{ "$defs": { "AEROSTRUCENUM": { "description": "", "enum": [ "glider", "plane" ], "title": "AEROSTRUCENUM", "type": "string" }, "ANIMALBODYCONDENUM": { "description": "", "enum": [ "normal", "over conditioned", "under conditioned" ], "title": "ANIMALBODYCONDENUM", "type": "string" }, "ANIMALSEXENUM": { "description": "", "enum": [ "castrated female", "castrated male", "intact female", "intact male" ], "title": "ANIMALSEXENUM", "type": "string" }, "ARCHSTRUCENUM": { "description": "", "enum": [ "building", "home", "shed" ], "title": "ARCHSTRUCENUM", "type": "string" }, "Agriculture": { "additionalProperties": false, "description": "agriculture extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "chem_administration", "food_source", "specific_host", "pathogenicity", "temp", "samp_store_dur", "samp_store_loc", "samp_collect_device", "samp_mat_process", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "samp_size", "seq_meth", "samp_vol_we_dna_ext", "pcr_primers", "nucl_acid_ext", "nucl_acid_amp", "lib_reads_seqd", "lib_vector", "lib_screen", "target_gene", "target_subfragment", "mid", "adapters", "pcr_cond", "chimera_check", "assembly_name", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "Agriculture", "type": "object" }, "Air": { "additionalProperties": false, "description": "air extension", "properties": { "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "alt" ], "title": "Air", "type": "object" }, "BINPARAMENUM": { "description": "", "enum": [ "codon usage", "combination", "coverage", "homology search", "kmer" ], "title": "BINPARAMENUM", "type": "string" }, "BIOTICRELATIONSHIPENUM": { "description": "", "enum": [ "commensalism", "free living", "mutualism", "parasitism", "symbiotic" ], "title": "BIOTICRELATIONSHIPENUM", "type": "string" }, "BUILDDOCSENUM": { "description": "", "enum": [ "building information model", "commissioning report", "complaint logs", "contract administration", "cost estimate", "janitorial schedules or logs", "maintenance plans", "schedule", "sections", "shop drawings", "submittals", "ventilation system", "windows" ], "title": "BUILDDOCSENUM", "type": "string" }, "BUILDINGSETTINGENUM": { "description": "", "enum": [ "exurban", "rural", "suburban", "urban" ], "title": "BUILDINGSETTINGENUM", "type": "string" }, "BUILDOCCUPTYPEENUM": { "description": "", "enum": [ "airport", "commercial", "health care", "high rise", "low rise", "market", "office", "residence", "residential", "restaurant", "school", "sports complex", "wood framed" ], "title": "BUILDOCCUPTYPEENUM", "type": "string" }, "BUILTSTRUCSETENUM": { "description": "", "enum": [ "rural", "urban" ], "title": "BUILTSTRUCSETENUM", "type": "string" }, "BuiltEnvironment": { "additionalProperties": false, "description": "built environment extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "project_name", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "BuiltEnvironment", "type": "object" }, "CEILFINISHMATENUM": { "description": "", "enum": [ "PVC", "drywall", "fiberglass", "metal", "mineral fibre", "mineral wool/calcium silicate", "plasterboard", "stucco", "tiles", "wood" ], "title": "CEILFINISHMATENUM", "type": "string" }, "CEILSTRUCENUM": { "description": "", "enum": [ "concrete", "wood frame" ], "title": "CEILSTRUCENUM", "type": "string" }, "CEILTYPEENUM": { "description": "", "enum": [ "barrel-shaped", "cathedral", "coffered", "concave", "cove", "dropped", "stretched" ], "title": "CEILTYPEENUM", "type": "string" }, "COMPLAPPRENUM": { "description": "", "enum": [ "marker gene", "other", "reference based" ], "title": "COMPLAPPRENUM", "type": "string" }, "CONTAMSCREENINPUTENUM": { "description": "", "enum": [ "contigs", "reads" ], "title": "CONTAMSCREENINPUTENUM", "type": "string" }, "CULTRESULTENUM": { "description": "", "enum": [ "absent", "active", "inactive", "negative", "no", "positive", "present", "yes" ], "title": "CULTRESULTENUM", "type": "string" }, "Checklist": { "additionalProperties": false, "description": "A collection of metadata terms (slots) to minimally describe the sampling and sequencing method of a specimen used to generate a nucleotide sequence.", "title": "Checklist", "type": "object" }, "DEPOSENVENUM": { "description": "", "enum": [ "Continental - Aeolian", "Continental - Alluvial", "Continental - Fluvial", "Continental - Lacustrine", "Marine - Deep", "Marine - Reef", "Marine - Shallow", "Other - Evaporite", "Other - Glacial", "Other - Volcanic", "Transitional - Beach", "Transitional - Deltaic", "Transitional - Lagoonal", "Transitional - Lake", "Transitional - Tidal", "other" ], "title": "DEPOSENVENUM", "type": "string" }, "DOMINANTHANDENUM": { "description": "", "enum": [ "ambidextrous", "left", "right" ], "title": "DOMINANTHANDENUM", "type": "string" }, "DOORCOMPTYPEENUM": { "description": "", "enum": [ "metal covered", "revolving", "sliding", "telescopic" ], "title": "DOORCOMPTYPEENUM", "type": "string" }, "DOORDIRECTENUM": { "description": "", "enum": [ "inward", "outward", "sideways" ], "title": "DOORDIRECTENUM", "type": "string" }, "DOORMATENUM": { "description": "", "enum": [ "aluminum", "cellular PVC", "engineered plastic", "fiberboard", "fiberglass", "metal", "thermoplastic alloy", "vinyl", "wood", "wood/plastic composite" ], "title": "DOORMATENUM", "type": "string" }, "DOORMOVEENUM": { "description": "", "enum": [ "collapsible", "folding", "revolving", "rolling shutter", "sliding", "swinging" ], "title": "DOORMOVEENUM", "type": "string" }, "DOORTYPEENUM": { "description": "", "enum": [ "composite", "metal", "wooden" ], "title": "DOORTYPEENUM", "type": "string" }, "DOORTYPEMETALENUM": { "description": "", "enum": [ "collapsible", "corrugated steel", "hollow", "rolling shutters", "steel plate" ], "title": "DOORTYPEMETALENUM", "type": "string" }, "DRAINAGECLASSENUM": { "description": "", "enum": [ "excessively drained", "moderately well", "poorly", "somewhat poorly", "very poorly", "well" ], "title": "DRAINAGECLASSENUM", "type": "string" }, "DRAWINGSENUM": { "description": "", "enum": [ "as built", "bid", "building navigation map", "construction", "design", "diagram", "operation", "sketch" ], "title": "DRAWINGSENUM", "type": "string" }, "EXTRWEATHEREVENTENUM": { "description": "", "enum": [ "drought", "dust storm", "extreme cold", "extreme heat", "flood", "frost", "hail", "high precipitationhigh winds" ], "title": "EXTRWEATHEREVENTENUM", "type": "string" }, "Extension": { "additionalProperties": false, "description": "A collection of recommended metadata terms (slots) developed by community experts, describing the specific context under which a sample was collected.", "title": "Extension", "type": "object" }, "FACILITYTYPEENUM": { "description": "", "enum": [ "ambient storage", "caterer-catering point", "distribution", "frozen storage", "importer-broker", "interstate conveyance", "labeler-relabeler", "manufacturing-processing", "packaging", "refrigerated storage", "storage" ], "title": "FACILITYTYPEENUM", "type": "string" }, "FAOCLASSENUM": { "description": "", "enum": [ "Acrisols", "Andosols", "Arenosols", "Cambisols", "Chernozems", "Ferralsols", "Fluvisols", "Gleysols", "Greyzems", "Gypsisols", "Histosols", "Kastanozems", "Lithosols", "Luvisols", "Nitosols", "Phaeozems", "Planosols", "Podzols", "Podzoluvisols", "Rankers", "Regosols", "Rendzinas", "Solonchaks", "Solonetz", "Vertisols", "Yermosols" ], "title": "FAOCLASSENUM", "type": "string" }, "FARMWATERSOURCEENUM": { "description": "", "enum": [ "brackish", "canal", "collected rainwater", "ditch", "estuary", "freshwater", "lake", "manmade", "melt pond", "municipal", "natural", "pond", "reservior", "river", "saline", "storage tank", "stream", "well" ], "title": "FARMWATERSOURCEENUM", "type": "string" }, "FILTERTYPEENUM": { "description": "", "enum": [ "HEPA", "chemical air filter", "electrostatic", "gas-phase or ultraviolet air treatments", "low-MERV pleated media", "particulate air filter" ], "title": "FILTERTYPEENUM", "type": "string" }, "FIREPLACETYPEENUM": { "description": "", "enum": [ "gas burning", "wood burning" ], "title": "FIREPLACETYPEENUM", "type": "string" }, "FLOORSTRUCENUM": { "description": "", "enum": [ "balcony", "concrete", "floating floor", "glass floor", "raised floor", "sprung floor", "wood-framed" ], "title": "FLOORSTRUCENUM", "type": "string" }, "FLOORWATERMOLDENUM": { "description": "", "enum": [ "bulging walls", "ceiling discoloration", "condensation", "floor discoloration", "mold odor", "peeling paint or wallpaper", "wall discoloration", "water stains", "wet floor" ], "title": "FLOORWATERMOLDENUM", "type": "string" }, "FOODCLEANPROCENUM": { "description": "", "enum": [ "drum and drain", "manual spinner", "rinsed with sanitizer solution", "rinsed with water", "scrubbed with brush", "scrubbed with hand", "soaking" ], "title": "FOODCLEANPROCENUM", "type": "string" }, "FOODTRACELISTENUM": { "description": "", "enum": [ "cheeses-other than hard cheeses", "crustaceans", "cucumbers", "finfish-including smoked finfish", "fruits and vegetables-fresh cut", "herbs-fresh", "leafy greens-including fresh cut leafy greens", "melons", "mollusks-bivalves", "nut butter", "peppers", "ready to eat deli salads", "shell eggs", "sprouts", "tomatoes", "tropical tree fruits" ], "title": "FOODTRACELISTENUM", "type": "string" }, "FREQCLEANENUM": { "description": "", "enum": [ "Annually", "Daily", "Monthly", "Quarterly", "Weekly", "other" ], "title": "FREQCLEANENUM", "type": "string" }, "FURNITUREENUM": { "description": "", "enum": [ "cabinet", "chair", "desks" ], "title": "FURNITUREENUM", "type": "string" }, "FoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "food-animal and animal feed extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "lat_lon", "geo_loc_name", "collection_date", "seq_meth", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "FoodAnimalAndAnimalFeed", "type": "object" }, "FoodFarmEnvironment": { "additionalProperties": false, "description": "food-farm environment extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "lat_lon", "geo_loc_name", "collection_date", "seq_meth", "depth", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "FoodFarmEnvironment", "type": "object" }, "FoodFoodProductionFacility": { "additionalProperties": false, "description": "food-food production facility extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "lat_lon", "geo_loc_name", "collection_date", "seq_meth", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "FoodFoodProductionFacility", "type": "object" }, "FoodHumanFoods": { "additionalProperties": false, "description": "food-human foods extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "lat_lon", "geo_loc_name", "collection_date", "seq_meth", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "FoodHumanFoods", "type": "object" }, "GENDERRESTROOMENUM": { "description": "", "enum": [ "all gender", "female", "gender neurtral", "male", "male and female", "unisex" ], "title": "GENDERRESTROOMENUM", "type": "string" }, "GROWTHHABITENUM": { "description": "", "enum": [ "erect", "prostrate", "semi-erect", "spreading" ], "title": "GROWTHHABITENUM", "type": "string" }, "HANDIDNESSENUM": { "description": "", "enum": [ "ambidexterity", "left handedness", "mixed-handedness", "right handedness" ], "title": "HANDIDNESSENUM", "type": "string" }, "HCPRODUCEDENUM": { "description": "", "enum": [ "Bitumen", "Coalbed Methane", "Gas", "Gas-Condensate", "Oil", "other" ], "title": "HCPRODUCEDENUM", "type": "string" }, "HCRENUM": { "description": "", "enum": [ "Coalbed", "Gas Reservoir", "Oil Reservoir", "Oil Sand", "Shale", "Tight Gas Reservoir", "Tight Oil Reservoir", "other" ], "title": "HCRENUM", "type": "string" }, "HEATCOOLTYPEENUM": { "description": "", "enum": [ "forced air system", "heat pump", "radiant system", "steam forced heat", "wood stove" ], "title": "HEATCOOLTYPEENUM", "type": "string" }, "HEATSYSDELIVMETHENUM": { "description": "", "enum": [ "conductive", "radiant" ], "title": "HEATSYSDELIVMETHENUM", "type": "string" }, "HOSTCELLULARLOCENUM": { "description": "", "enum": [ "extracellular", "intracellular", "not determined" ], "title": "HOSTCELLULARLOCENUM", "type": "string" }, "HOSTDEPENDENCEENUM": { "description": "", "enum": [ "facultative", "obligate" ], "title": "HOSTDEPENDENCEENUM", "type": "string" }, "HOSTPREDAPPRENUM": { "description": "", "enum": [ "CRISPR spacer match", "co-occurrence", "combination", "host sequence similarity", "kmer similarity", "other", "provirus" ], "title": "HOSTPREDAPPRENUM", "type": "string" }, "HOSTSPECIFICITYENUM": { "description": "", "enum": [ "family-specific", "generalist", "genus-specific", "species-specific" ], "title": "HOSTSPECIFICITYENUM", "type": "string" }, "HostAssociated": { "additionalProperties": false, "description": "host-associated extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "HostAssociated", "type": "object" }, "HumanAssociated": { "additionalProperties": false, "description": "human-associated extension", "properties": { "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "HumanAssociated", "type": "object" }, "HumanGut": { "additionalProperties": false, "description": "human-gut extension", "properties": { "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "HumanGut", "type": "object" }, "HumanOral": { "additionalProperties": false, "description": "human-oral extension", "properties": { "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "HumanOral", "type": "object" }, "HumanSkin": { "additionalProperties": false, "description": "human-skin extension", "properties": { "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "HumanSkin", "type": "object" }, "HumanVaginal": { "additionalProperties": false, "description": "human-vaginal extension", "properties": { "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "project_name" ], "title": "HumanVaginal", "type": "object" }, "HydrocarbonResourcesCores": { "additionalProperties": false, "description": "hydrocarbon resources-cores extension", "properties": { "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "temp", "api" ], "title": "HydrocarbonResourcesCores", "type": "object" }, "HydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "hydrocarbon resources-fluids/swabs extension", "properties": { "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "temp", "sulfate", "sulfide", "nitrate", "api" ], "title": "HydrocarbonResourcesFluidsSwabs", "type": "object" }, "INDOORSPACEENUM": { "description": "", "enum": [ "bathroom", "bedroom", "elevator", "foyer", "hallway", "kitchen", "locker room", "office" ], "title": "INDOORSPACEENUM", "type": "string" }, "INDOORSURFENUM": { "description": "", "enum": [ "cabinet", "ceiling", "counter top", "door", "shelving", "vent cover", "wall", "window" ], "title": "INDOORSURFENUM", "type": "string" }, "LIBLAYOUTENUM": { "description": "", "enum": [ "other", "paired", "single", "vector" ], "title": "LIBLAYOUTENUM", "type": "string" }, "LIGHTTYPEENUM": { "description": "", "enum": [ "desk lamp", "electric light", "flourescent lights", "natural light", "none" ], "title": "LIGHTTYPEENUM", "type": "string" }, "LITHOLOGYENUM": { "description": "", "enum": [ "Basement", "Chalk", "Chert", "Coal", "Conglomerate", "Diatomite", "Dolomite", "Limestone", "Sandstone", "Shale", "Siltstone", "Volcanic", "other" ], "title": "LITHOLOGYENUM", "type": "string" }, "MAGCOVSOFTWAREENUM": { "description": "", "enum": [ "bbmap", "bowtie", "bwa", "other" ], "title": "MAGCOVSOFTWAREENUM", "type": "string" }, "MECHSTRUCENUM": { "description": "", "enum": [ "boat", "bus", "car", "carriage", "coach", "elevator", "escalator", "subway", "train" ], "title": "MECHSTRUCENUM", "type": "string" }, "MODETRANSMISSIONENUM": { "description": "", "enum": [ "horizontal:castrator", "horizontal:directly transmitted", "horizontal:micropredator", "horizontal:parasitoid", "horizontal:trophically transmitted", "horizontal:vector transmitted", "vertical" ], "title": "MODETRANSMISSIONENUM", "type": "string" }, "MicrobialMatBiofilm": { "additionalProperties": false, "description": "microbial mat/biofilm extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "MicrobialMatBiofilm", "type": "object" }, "MigsBaAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "nucl_acid_amp", "assembly_name", "temp", "nucl_acid_ext", "isol_growth_condt", "pathogenicity", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "adapters", "assembly_software", "env_local_scale", "specific_host", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "pcr_primers", "target_gene", "target_subfragment", "mid", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MigsBaAgriculture", "type": "object" }, "MigsBaAir": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "alt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaAir", "type": "object" }, "MigsBaBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MigsBaBuiltEnvironment", "type": "object" }, "MigsBaFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MigsBaFoodAnimalAndAnimalFeed", "type": "object" }, "MigsBaFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MigsBaFoodFarmEnvironment", "type": "object" }, "MigsBaFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MigsBaFoodFoodProductionFacility", "type": "object" }, "MigsBaFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MigsBaFoodHumanFoods", "type": "object" }, "MigsBaHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaHostAssociated", "type": "object" }, "MigsBaHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaHumanAssociated", "type": "object" }, "MigsBaHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaHumanGut", "type": "object" }, "MigsBaHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaHumanOral", "type": "object" }, "MigsBaHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaHumanSkin", "type": "object" }, "MigsBaHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaHumanVaginal", "type": "object" }, "MigsBaHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MigsBaHydrocarbonResourcesCores", "type": "object" }, "MigsBaHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MigsBaHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MigsBaMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaMicrobialMatBiofilm", "type": "object" }, "MigsBaMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MigsBaPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaPlantAssociated", "type": "object" }, "MigsBaSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaSediment", "type": "object" }, "MigsBaSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MigsBaSoil", "type": "object" }, "MigsBaSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MigsBaSymbiontAssociated", "type": "object" }, "MigsBaWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaWastewaterSludge", "type": "object" }, "MigsBaWater": { "additionalProperties": false, "description": "MIxS data that complies with the MigsBa checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "num_replicons", "assembly_qual", "ref_biomaterial", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsBaWater", "type": "object" }, "MigsEuAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "nucl_acid_amp", "assembly_name", "temp", "nucl_acid_ext", "isol_growth_condt", "pathogenicity", "number_contig", "assembly_qual", "project_name", "adapters", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "pcr_primers", "target_gene", "target_subfragment", "mid", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MigsEuAgriculture", "type": "object" }, "MigsEuAir": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "alt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuAir", "type": "object" }, "MigsEuBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MigsEuBuiltEnvironment", "type": "object" }, "MigsEuFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MigsEuFoodAnimalAndAnimalFeed", "type": "object" }, "MigsEuFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MigsEuFoodFarmEnvironment", "type": "object" }, "MigsEuFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MigsEuFoodFoodProductionFacility", "type": "object" }, "MigsEuFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MigsEuFoodHumanFoods", "type": "object" }, "MigsEuHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuHostAssociated", "type": "object" }, "MigsEuHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuHumanAssociated", "type": "object" }, "MigsEuHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuHumanGut", "type": "object" }, "MigsEuHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuHumanOral", "type": "object" }, "MigsEuHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuHumanSkin", "type": "object" }, "MigsEuHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuHumanVaginal", "type": "object" }, "MigsEuHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MigsEuHydrocarbonResourcesCores", "type": "object" }, "MigsEuHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MigsEuHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MigsEuMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuMicrobialMatBiofilm", "type": "object" }, "MigsEuMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MigsEuPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuPlantAssociated", "type": "object" }, "MigsEuSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuSediment", "type": "object" }, "MigsEuSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MigsEuSoil", "type": "object" }, "MigsEuSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MigsEuSymbiontAssociated", "type": "object" }, "MigsEuWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuWastewaterSludge", "type": "object" }, "MigsEuWater": { "additionalProperties": false, "description": "MIxS data that complies with the MigsEu checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ploidy": { "description": "The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "number_contig", "assembly_qual", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsEuWater", "type": "object" }, "MigsOrgAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "nucl_acid_amp", "assembly_name", "temp", "nucl_acid_ext", "isol_growth_condt", "project_name", "adapters", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "specific_host", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "pcr_primers", "target_gene", "target_subfragment", "mid", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MigsOrgAgriculture", "type": "object" }, "MigsOrgAir": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "alt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgAir", "type": "object" }, "MigsOrgBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MigsOrgBuiltEnvironment", "type": "object" }, "MigsOrgFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MigsOrgFoodAnimalAndAnimalFeed", "type": "object" }, "MigsOrgFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MigsOrgFoodFarmEnvironment", "type": "object" }, "MigsOrgFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MigsOrgFoodFoodProductionFacility", "type": "object" }, "MigsOrgFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MigsOrgFoodHumanFoods", "type": "object" }, "MigsOrgHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgHostAssociated", "type": "object" }, "MigsOrgHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgHumanAssociated", "type": "object" }, "MigsOrgHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgHumanGut", "type": "object" }, "MigsOrgHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgHumanOral", "type": "object" }, "MigsOrgHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgHumanSkin", "type": "object" }, "MigsOrgHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgHumanVaginal", "type": "object" }, "MigsOrgHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MigsOrgHydrocarbonResourcesCores", "type": "object" }, "MigsOrgHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MigsOrgHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MigsOrgMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgMicrobialMatBiofilm", "type": "object" }, "MigsOrgMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MigsOrgPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgPlantAssociated", "type": "object" }, "MigsOrgSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgSediment", "type": "object" }, "MigsOrgSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MigsOrgSoil", "type": "object" }, "MigsOrgSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MigsOrgSymbiontAssociated", "type": "object" }, "MigsOrgWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgWastewaterSludge", "type": "object" }, "MigsOrgWater": { "additionalProperties": false, "description": "MIxS data that complies with the MigsOrg checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsOrgWater", "type": "object" }, "MigsPlAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "nucl_acid_amp", "assembly_name", "temp", "nucl_acid_ext", "isol_growth_condt", "propagation", "project_name", "adapters", "assembly_software", "env_local_scale", "specific_host", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "pcr_primers", "target_gene", "target_subfragment", "mid", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MigsPlAgriculture", "type": "object" }, "MigsPlAir": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "alt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlAir", "type": "object" }, "MigsPlBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MigsPlBuiltEnvironment", "type": "object" }, "MigsPlFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MigsPlFoodAnimalAndAnimalFeed", "type": "object" }, "MigsPlFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MigsPlFoodFarmEnvironment", "type": "object" }, "MigsPlFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MigsPlFoodFoodProductionFacility", "type": "object" }, "MigsPlFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MigsPlFoodHumanFoods", "type": "object" }, "MigsPlHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlHostAssociated", "type": "object" }, "MigsPlHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlHumanAssociated", "type": "object" }, "MigsPlHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlHumanGut", "type": "object" }, "MigsPlHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlHumanOral", "type": "object" }, "MigsPlHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlHumanSkin", "type": "object" }, "MigsPlHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlHumanVaginal", "type": "object" }, "MigsPlHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MigsPlHydrocarbonResourcesCores", "type": "object" }, "MigsPlHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MigsPlHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MigsPlMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlMicrobialMatBiofilm", "type": "object" }, "MigsPlMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MigsPlPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlPlantAssociated", "type": "object" }, "MigsPlSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlSediment", "type": "object" }, "MigsPlSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MigsPlSoil", "type": "object" }, "MigsPlSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MigsPlSymbiontAssociated", "type": "object" }, "MigsPlWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlWastewaterSludge", "type": "object" }, "MigsPlWater": { "additionalProperties": false, "description": "MIxS data that complies with the MigsPl checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsPlWater", "type": "object" }, "MigsViAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "nucl_acid_amp", "assembly_name", "temp", "nucl_acid_ext", "isol_growth_condt", "pathogenicity", "propagation", "project_name", "host_spec_range", "adapters", "assembly_software", "env_local_scale", "specific_host", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "pcr_primers", "target_gene", "target_subfragment", "mid", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MigsViAgriculture", "type": "object" }, "MigsViAir": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "alt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViAir", "type": "object" }, "MigsViBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MigsViBuiltEnvironment", "type": "object" }, "MigsViFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MigsViFoodAnimalAndAnimalFeed", "type": "object" }, "MigsViFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MigsViFoodFarmEnvironment", "type": "object" }, "MigsViFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MigsViFoodFoodProductionFacility", "type": "object" }, "MigsViFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MigsViFoodHumanFoods", "type": "object" }, "MigsViHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViHostAssociated", "type": "object" }, "MigsViHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViHumanAssociated", "type": "object" }, "MigsViHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViHumanGut", "type": "object" }, "MigsViHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViHumanOral", "type": "object" }, "MigsViHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViHumanSkin", "type": "object" }, "MigsViHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViHumanVaginal", "type": "object" }, "MigsViHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MigsViHydrocarbonResourcesCores", "type": "object" }, "MigsViHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MigsViHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MigsViMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViMicrobialMatBiofilm", "type": "object" }, "MigsViMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MigsViPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViPlantAssociated", "type": "object" }, "MigsViSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViSediment", "type": "object" }, "MigsViSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MigsViSoil", "type": "object" }, "MigsViSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MigsViSymbiontAssociated", "type": "object" }, "MigsViWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViWastewaterSludge", "type": "object" }, "MigsViWater": { "additionalProperties": false, "description": "MIxS data that complies with the MigsVi checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "encoded_traits": { "description": "Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "num_replicons": { "description": "Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote", "type": "integer" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "propagation": { "description": "The type of reproduction from the parent stock. Values for this field is specific to different taxa. For phage or virus: lytic/lysogenic/temperate/obligately lytic. For plasmids: incompatibility group. For eukaryotes: sexual/asexual", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "propagation", "project_name", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MigsViWater", "type": "object" }, "MimagAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "lib_screen", "nucl_acid_amp", "mid", "assembly_name", "temp", "compl_score", "nucl_acid_ext", "samp_size", "bin_param", "bin_software", "lib_reads_seqd", "samp_collect_device", "assembly_qual", "project_name", "lib_vector", "adapters", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "samp_mat_process", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "specific_host", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "pcr_primers", "target_gene", "target_subfragment", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MimagAgriculture", "type": "object" }, "MimagAir": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "alt", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagAir", "type": "object" }, "MimagBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MimagBuiltEnvironment", "type": "object" }, "MimagFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MimagFoodAnimalAndAnimalFeed", "type": "object" }, "MimagFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MimagFoodFarmEnvironment", "type": "object" }, "MimagFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MimagFoodFoodProductionFacility", "type": "object" }, "MimagFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MimagFoodHumanFoods", "type": "object" }, "MimagHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagHostAssociated", "type": "object" }, "MimagHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagHumanAssociated", "type": "object" }, "MimagHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagHumanGut", "type": "object" }, "MimagHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagHumanOral", "type": "object" }, "MimagHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagHumanSkin", "type": "object" }, "MimagHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagHumanVaginal", "type": "object" }, "MimagHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MimagHydrocarbonResourcesCores", "type": "object" }, "MimagHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MimagHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MimagMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagMicrobialMatBiofilm", "type": "object" }, "MimagMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MimagPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagPlantAssociated", "type": "object" }, "MimagSediment": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagSediment", "type": "object" }, "MimagSoil": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MimagSoil", "type": "object" }, "MimagSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MimagSymbiontAssociated", "type": "object" }, "MimagWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagWastewaterSludge", "type": "object" }, "MimagWater": { "additionalProperties": false, "description": "MIxS data that complies with the Mimag checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "bin_param", "bin_software", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimagWater", "type": "object" }, "MimarksCAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "pcr_primers", "nucl_acid_amp", "target_subfragment", "temp", "pcr_cond", "nucl_acid_ext", "isol_growth_condt", "project_name", "chimera_check", "env_local_scale", "samp_mat_process", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "chem_administration", "food_source", "specific_host", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "lib_reads_seqd", "lib_vector", "lib_screen", "mid", "adapters", "assembly_name", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MimarksCAgriculture", "type": "object" }, "MimarksCAir": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the Air Extension", "properties": { "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "alt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCAir", "type": "object" }, "MimarksCBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MimarksCBuiltEnvironment", "type": "object" }, "MimarksCFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MimarksCFoodAnimalAndAnimalFeed", "type": "object" }, "MimarksCFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MimarksCFoodFarmEnvironment", "type": "object" }, "MimarksCFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MimarksCFoodFoodProductionFacility", "type": "object" }, "MimarksCFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MimarksCFoodHumanFoods", "type": "object" }, "MimarksCHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HostAssociated Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCHostAssociated", "type": "object" }, "MimarksCHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HumanAssociated Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCHumanAssociated", "type": "object" }, "MimarksCHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HumanGut Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCHumanGut", "type": "object" }, "MimarksCHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HumanOral Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCHumanOral", "type": "object" }, "MimarksCHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HumanSkin Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCHumanSkin", "type": "object" }, "MimarksCHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HumanVaginal Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCHumanVaginal", "type": "object" }, "MimarksCHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HydrocarbonResourcesCores Extension", "properties": { "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MimarksCHydrocarbonResourcesCores", "type": "object" }, "MimarksCHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MimarksCHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MimarksCMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the MicrobialMatBiofilm Extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCMicrobialMatBiofilm", "type": "object" }, "MimarksCMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MimarksCPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the PlantAssociated Extension", "properties": { "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCPlantAssociated", "type": "object" }, "MimarksCSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the Sediment Extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCSediment", "type": "object" }, "MimarksCSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the Soil Extension", "properties": { "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale", "target_gene" ], "title": "MimarksCSoil", "type": "object" }, "MimarksCSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the SymbiontAssociated Extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MimarksCSymbiontAssociated", "type": "object" }, "MimarksCWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the WastewaterSludge Extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCWastewaterSludge", "type": "object" }, "MimarksCWater": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksC checklist and the Water Extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extrachrom_elements": { "description": "Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)", "type": "integer" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "isol_growth_condt", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "target_gene" ], "title": "MimarksCWater", "type": "object" }, "MimarksSAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "pcr_primers", "lib_screen", "nucl_acid_amp", "target_subfragment", "mid", "temp", "nucl_acid_ext", "samp_size", "lib_reads_seqd", "samp_collect_device", "env_broad_scale", "project_name", "lib_vector", "adapters", "chimera_check", "env_local_scale", "samp_mat_process", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "pcr_cond", "target_gene", "chem_administration", "food_source", "specific_host", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "assembly_name", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MimarksSAgriculture", "type": "object" }, "MimarksSAir": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "alt", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSAir", "type": "object" }, "MimarksSBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MimarksSBuiltEnvironment", "type": "object" }, "MimarksSFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MimarksSFoodAnimalAndAnimalFeed", "type": "object" }, "MimarksSFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MimarksSFoodFarmEnvironment", "type": "object" }, "MimarksSFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MimarksSFoodFoodProductionFacility", "type": "object" }, "MimarksSFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MimarksSFoodHumanFoods", "type": "object" }, "MimarksSHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSHostAssociated", "type": "object" }, "MimarksSHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSHumanAssociated", "type": "object" }, "MimarksSHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSHumanGut", "type": "object" }, "MimarksSHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSHumanOral", "type": "object" }, "MimarksSHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSHumanSkin", "type": "object" }, "MimarksSHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSHumanVaginal", "type": "object" }, "MimarksSHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MimarksSHydrocarbonResourcesCores", "type": "object" }, "MimarksSHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MimarksSHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MimarksSMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSMicrobialMatBiofilm", "type": "object" }, "MimarksSMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MimarksSPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSPlantAssociated", "type": "object" }, "MimarksSSediment": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSSediment", "type": "object" }, "MimarksSSoil": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "target_gene" ], "title": "MimarksSSoil", "type": "object" }, "MimarksSSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MimarksSSymbiontAssociated", "type": "object" }, "MimarksSWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSWastewaterSludge", "type": "object" }, "MimarksSWater": { "additionalProperties": false, "description": "MIxS data that complies with the MimarksS checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "env_broad_scale", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "target_gene" ], "title": "MimarksSWater", "type": "object" }, "MimsAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "lib_screen", "nucl_acid_amp", "mid", "assembly_name", "temp", "nucl_acid_ext", "samp_size", "lib_reads_seqd", "samp_collect_device", "project_name", "lib_vector", "adapters", "env_local_scale", "samp_mat_process", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "specific_host", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "pcr_primers", "target_gene", "target_subfragment", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MimsAgriculture", "type": "object" }, "MimsAir": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "alt", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsAir", "type": "object" }, "MimsBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MimsBuiltEnvironment", "type": "object" }, "MimsFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MimsFoodAnimalAndAnimalFeed", "type": "object" }, "MimsFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MimsFoodFarmEnvironment", "type": "object" }, "MimsFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MimsFoodFoodProductionFacility", "type": "object" }, "MimsFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MimsFoodHumanFoods", "type": "object" }, "MimsHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsHostAssociated", "type": "object" }, "MimsHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsHumanAssociated", "type": "object" }, "MimsHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsHumanGut", "type": "object" }, "MimsHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsHumanOral", "type": "object" }, "MimsHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsHumanSkin", "type": "object" }, "MimsHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsHumanVaginal", "type": "object" }, "MimsHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MimsHydrocarbonResourcesCores", "type": "object" }, "MimsHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MimsHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MimsMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsMicrobialMatBiofilm", "type": "object" }, "MimsMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MimsPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsPlantAssociated", "type": "object" }, "MimsSediment": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsSediment", "type": "object" }, "MimsSoil": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MimsSoil", "type": "object" }, "MimsSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MimsSymbiontAssociated", "type": "object" }, "MimsWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsWastewaterSludge", "type": "object" }, "MimsWater": { "additionalProperties": false, "description": "MIxS data that complies with the Mims checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MimsWater", "type": "object" }, "MisagAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "lib_screen", "nucl_acid_amp", "mid", "assembly_name", "temp", "compl_score", "nucl_acid_ext", "samp_size", "lib_reads_seqd", "samp_collect_device", "assembly_qual", "project_name", "lib_vector", "adapters", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "samp_mat_process", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "chem_administration", "food_source", "specific_host", "pathogenicity", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "host_spec_range", "pcr_primers", "target_gene", "target_subfragment", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MisagAgriculture", "type": "object" }, "MisagAir": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "alt", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagAir", "type": "object" }, "MisagBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MisagBuiltEnvironment", "type": "object" }, "MisagFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MisagFoodAnimalAndAnimalFeed", "type": "object" }, "MisagFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "depth", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MisagFoodFarmEnvironment", "type": "object" }, "MisagFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MisagFoodFoodProductionFacility", "type": "object" }, "MisagFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MisagFoodHumanFoods", "type": "object" }, "MisagHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagHostAssociated", "type": "object" }, "MisagHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagHumanAssociated", "type": "object" }, "MisagHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagHumanGut", "type": "object" }, "MisagHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagHumanOral", "type": "object" }, "MisagHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagHumanSkin", "type": "object" }, "MisagHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagHumanVaginal", "type": "object" }, "MisagHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MisagHydrocarbonResourcesCores", "type": "object" }, "MisagHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "temp", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MisagHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MisagMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagMicrobialMatBiofilm", "type": "object" }, "MisagMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MisagPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagPlantAssociated", "type": "object" }, "MisagSediment": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "depth", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagSediment", "type": "object" }, "MisagSoil": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "depth", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale" ], "title": "MisagSoil", "type": "object" }, "MisagSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MisagSymbiontAssociated", "type": "object" }, "MisagWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagWastewaterSludge", "type": "object" }, "MisagWater": { "additionalProperties": false, "description": "MIxS data that complies with the Misag checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "contam_score": { "description": "The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases", "type": "number" }, "contam_screen_input": { "$ref": "#/$defs/CONTAMSCREENINPUTENUM", "description": "The type of sequence data used as input" }, "contam_screen_param": { "description": "Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer", "type": "string" }, "decontam_software": { "description": "Tool(s) used in contamination screening", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_to_oxygen": { "$ref": "#/$defs/RELTOOXYGENENUM", "description": "Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "x16s_recover": { "description": "Can a 16S gene be recovered from the submitted SAG or MAG?", "type": "boolean" }, "x16s_recover_software": { "description": "Tools used for 16S rRNA gene extraction", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "compl_score", "assembly_qual", "project_name", "assembly_software", "tax_ident", "contam_score", "compl_software", "env_local_scale", "sort_tech", "depth", "wga_amp_appr", "env_medium", "samp_taxon_id", "geo_loc_name", "sc_lysis_approach", "collection_date", "seq_meth", "lat_lon", "env_broad_scale" ], "title": "MisagWater", "type": "object" }, "MiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "miscellaneous natural or artificial environment extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "MiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MiuvigAgriculture": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the Agriculture Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chimera_check": { "description": "Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "isol_growth_condt": { "description": "Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pcr_cond": { "description": "Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'", "type": "string" }, "pcr_primers": { "description": "PCR primers that were used to amplify the sequence of the targeted gene, locus or subfragment. This field should contain all the primers used for a single PCR reaction if multiple forward or reverse primers are present in a single PCR reaction. The primer sequence should be reported in uppercase letters", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "photosynt_activ": { "description": "Measurement of photosythetic activity (i.e. leaf gas exchange / chlorophyll fluorescence emissions / reflectance / transpiration) Please also include the term method term detailing the method of activity measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "photosynt_activ_meth": { "description": "Reference or method used in measurement of photosythetic activity", "items": { "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "seq_quality_check": { "$ref": "#/$defs/SEQQUALITYCHECKENUM", "description": "Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of some soil.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_pH": { "description": "pH of some soil.", "type": "number" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "target_gene": { "description": "Targeted gene or locus name for marker gene studies", "type": "string" }, "target_subfragment": { "description": "Name of subfragment of a gene or locus. Important to e.g. identify special regions on marker genes like V6 on 16S rRNA", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "trophic_level": { "$ref": "#/$defs/TROPHICLEVELENUM", "description": "Trophic levels are the feeding position in a food chain. Microbes can be a range of producers (e.g. chemolithotroph)" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_pH": { "description": "The pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "lib_screen", "source_uvig", "nucl_acid_amp", "mid", "assembly_name", "temp", "nucl_acid_ext", "samp_size", "lib_reads_seqd", "samp_collect_device", "number_contig", "assembly_qual", "project_name", "lib_vector", "virus_enrich_appr", "adapters", "assembly_software", "env_local_scale", "samp_mat_process", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "chem_administration", "food_source", "samp_store_dur", "samp_store_loc", "host_age", "host_common_name", "host_genotype", "host_height", "host_length", "host_life_stage", "host_phenotype", "host_taxid", "host_tot_mass", "pcr_primers", "target_gene", "target_subfragment", "pcr_cond", "chimera_check", "soil_type", "soil_type_meth", "store_cond", "microbial_biomass", "micro_biomass_meth", "sieving", "pool_dna_extracts" ], "title": "MiuvigAgriculture", "type": "object" }, "MiuvigAir": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the Air Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "barometric_press": { "description": "Force per unit area exerted against a surface by the weight of air above that surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_monoxide": { "description": "Carbon monoxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "oxygen": { "description": "Oxygen (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pollutants": { "description": "Pollutant types and, amount or concentrations measured at the time of sampling; can report multiple pollutants by entering numeric values preceded by name of pollutant", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "volatile_org_comp": { "description": "Concentration of carbon-based chemicals that easily evaporate at room temperature; can report multiple volatile organic compounds by entering numeric values preceded by name of compound", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "source_uvig", "alt", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigAir", "type": "object" }, "MiuvigBuiltEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the BuiltEnvironment Extension", "properties": { "abs_air_humidity": { "description": "Actual mass of water vapor - mh20 - present in the air water vapor mixture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "address": { "description": "The street name and building number where the sampling occurred", "type": "string" }, "adj_room": { "description": "List of rooms (room number, room name) immediately adjacent to the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "aero_struc": { "$ref": "#/$defs/AEROSTRUCENUM", "description": "Aerospace structures typically consist of thin plates with stiffeners for the external surfaces, bulkheads and frames to support the shape and fasteners such as welds, rivets, screws and bolts to hold the components together" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amount_light": { "description": "The unit of illuminance and luminous emittance, measuring luminous flux per unit area", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "arch_struc": { "$ref": "#/$defs/ARCHSTRUCENUM", "description": "An architectural structure is a human-made, free-standing, immobile outdoor construction" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_dew_point": { "description": "The average of dew point measures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "avg_temp": { "description": "The average of temperatures taken at the beginning of every hour over a 24 hour period on the sampling day", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bathroom_count": { "description": "The number of bathrooms in the building", "type": "integer" }, "bedroom_count": { "description": "The number of bedrooms in the building", "type": "integer" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "build_docs": { "$ref": "#/$defs/BUILDDOCSENUM", "description": "The building design, construction and operation documents" }, "build_occup_type": { "description": "The primary function for which a building or discrete part of a building is intended to be used", "items": { "$ref": "#/$defs/BUILDOCCUPTYPEENUM" }, "type": "array" }, "building_setting": { "$ref": "#/$defs/BUILDINGSETTINGENUM", "description": "A location (geography) where a building is set" }, "built_struc_age": { "description": "The age of the built structure since construction", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "built_struc_set": { "$ref": "#/$defs/BUILTSTRUCSETENUM", "description": "The characterization of the location of the built structure as high or low human density" }, "built_struc_type": { "description": "A physical structure that is a body or assemblage of bodies in space to form a system capable of supporting loads", "type": "string" }, "carb_dioxide": { "description": "Carbon dioxide (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_area": { "description": "The area of the ceiling space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the ceiling at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "ceil_finish_mat": { "$ref": "#/$defs/CEILFINISHMATENUM", "description": "The type of material used to finish a ceiling" }, "ceil_struc": { "$ref": "#/$defs/CEILSTRUCENUM", "description": "The construction format of the ceiling" }, "ceil_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a ceiling surface" }, "ceil_thermal_mass": { "description": "The ability of the ceiling to provide inertia against temperature fluctuations. Generally this means concrete that is exposed. A metal deck that supports a concrete slab will act thermally as long as it is exposed to room air flow", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ceil_type": { "$ref": "#/$defs/CEILTYPEENUM", "description": "The type of ceiling according to the ceiling's appearance or construction" }, "ceil_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the ceiling" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cool_syst_id": { "description": "The cooling system identifier", "type": "integer" }, "date_last_rain": { "description": "The date of the last time it rained", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "dew_point": { "description": "The temperature to which a given parcel of humid air must be cooled, at constant barometric pressure, for water vapor to condense into water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_comp_type": { "$ref": "#/$defs/DOORCOMPTYPEENUM", "description": "The composite type of the door" }, "door_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The phsical condition of the door" }, "door_direct": { "$ref": "#/$defs/DOORDIRECTENUM", "description": "The direction the door opens" }, "door_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the door in the room" }, "door_mat": { "$ref": "#/$defs/DOORMATENUM", "description": "The material the door is composed of" }, "door_move": { "$ref": "#/$defs/DOORMOVEENUM", "description": "The type of movement of the door" }, "door_size": { "description": "The size of the door", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "door_type": { "$ref": "#/$defs/DOORTYPEENUM", "description": "The type of door material" }, "door_type_metal": { "$ref": "#/$defs/DOORTYPEMETALENUM", "description": "The type of metal door" }, "door_type_wood": { "description": "The type of wood door", "type": "string" }, "door_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a door" }, "drawings": { "$ref": "#/$defs/DRAWINGSENUM", "description": "The buildings architectural drawings; if design is chosen, indicate phase-conceptual, schematic, design development, and construction documents" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elevator": { "description": "The number of elevators within the built structure", "type": "integer" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "escalator": { "description": "The number of escalators within the built structure", "type": "integer" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "exp_duct": { "description": "The amount of exposed ductwork in the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "exp_pipe": { "description": "The number of exposed pipes in the room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "ext_door": { "description": "The number of exterior doors in the built structure", "type": "integer" }, "ext_wall_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The orientation of the exterior wall" }, "ext_window_orient": { "$ref": "#/$defs/SHAREDENUM0", "description": "The compass direction the exterior window of the room is facing" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "filter_type": { "description": "A device which removes solid particulates or airborne molecular contaminants", "items": { "$ref": "#/$defs/FILTERTYPEENUM" }, "type": "array" }, "fireplace_type": { "$ref": "#/$defs/FIREPLACETYPEENUM", "description": "A firebox with chimney" }, "floor_age": { "description": "The time period since installment of the carpet or flooring", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_area": { "description": "The area of the floor space within the room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the floor at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "floor_count": { "description": "The number of floors in the building, including basements and mechanical penthouse", "type": "integer" }, "floor_finish_mat": { "description": "The floor covering type; the finished surface that is walked on", "type": "string" }, "floor_struc": { "$ref": "#/$defs/FLOORSTRUCENUM", "description": "Refers to the structural elements and subfloor upon which the finish flooring is installed" }, "floor_thermal_mass": { "description": "The ability of the floor to provide inertia against temperature fluctuations", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "floor_water_mold": { "$ref": "#/$defs/FLOORWATERMOLDENUM", "description": "Signs of the presence of mold or mildew in a room" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "freq_cook": { "description": "The number of times a meal is cooked per week", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "furniture": { "$ref": "#/$defs/FURNITUREENUM", "description": "The types of furniture present in the sampled room" }, "gender_restroom": { "$ref": "#/$defs/GENDERRESTROOMENUM", "description": "The gender type of the restroom" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hall_count": { "description": "The total count of hallways and cooridors in the built structure", "type": "integer" }, "handidness": { "$ref": "#/$defs/HANDIDNESSENUM", "description": "The handidness of the individual sampled" }, "heat_cool_type": { "description": "Methods of conditioning or heating a room or building", "items": { "$ref": "#/$defs/HEATCOOLTYPEENUM" }, "type": "array" }, "heat_deliv_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The location of heat delivery within the room" }, "heat_sys_deliv_meth": { "$ref": "#/$defs/HEATSYSDELIVMETHENUM", "description": "The method by which the heat is delivered through the system" }, "heat_system_id": { "description": "The heating system identifier", "type": "integer" }, "height_carper_fiber": { "description": "The average carpet fiber height in the indoor environment", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indoor_space": { "$ref": "#/$defs/INDOORSPACEENUM", "description": "A distinguishable space within a structure, the purpose for which discrete areas of a building is used" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "inside_lux": { "description": "The recorded value at sampling time (power density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "int_wall_cond": { "$ref": "#/$defs/SHAREDENUM3", "description": "The physical condition of the wall at the time of sampling; photos or video preferred; use drawings to indicate location of damaged areas" }, "last_clean": { "description": "The last time the floor was cleaned (swept, mopped, vacuumed)", "format": "date-time", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_type": { "description": "Application of light to achieve some practical or aesthetic effect. Lighting includes the use of both artificial light sources such as lamps and light fixtures, as well as natural illumination by capturing daylight. Can also include absence of light", "items": { "$ref": "#/$defs/LIGHTTYPEENUM" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "max_occup": { "description": "The maximum amount of people allowed in the indoor environment", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mech_struc": { "$ref": "#/$defs/MECHSTRUCENUM", "description": "mechanical structure: a moving structure" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "number_pets": { "description": "The number of pets residing in the sampled space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_plants": { "description": "The number of plant(s) in the sampling space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_resident": { "description": "The number of individuals currently occupying in the sampling location", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "occup_density_samp": { "description": "Average number of occupants at time of sampling per square footage", "type": "number" }, "occup_document": { "$ref": "#/$defs/OCCUPDOCUMENTENUM", "description": "The type of documentation of occupancy" }, "occup_samp": { "description": "Number of occupants present at time of sample within the given space", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pres_animal_insect": { "description": "The type and number of animals or insects present in the sampling space", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "quad_pos": { "$ref": "#/$defs/QUADPOSENUM", "description": "The quadrant position of the sampling room within the building" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_air_humidity": { "description": "Partial vapor and air pressure, density of the vapor and air, or by the actual mass of the vapor and air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "rel_humidity_out": { "description": "The recorded outside relative humidity value at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "rel_samp_loc": { "$ref": "#/$defs/RELSAMPLOCENUM", "description": "The sampling location within the train car" }, "room_air_exch_rate": { "description": "The rate at which outside air replaces indoor air in a given space", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_architec_elem": { "description": "The unique details and component parts that, together, form the architecture of a distinguisahable space within a built structure", "type": "string" }, "room_condt": { "$ref": "#/$defs/ROOMCONDTENUM", "description": "The condition of the room at the time of sampling" }, "room_connected": { "$ref": "#/$defs/ROOMCONNECTEDENUM", "description": "List of rooms connected to the sampling room by a doorway" }, "room_count": { "description": "The total count of rooms in the built structure including all room types", "type": "integer" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "room_door_dist": { "description": "Distance between doors (meters) in the hallway between the sampling room and adjacent rooms", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_door_share": { "description": "List of room(s) (room number, room name) sharing a door with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_hallway": { "description": "List of room(s) (room number, room name) located in the same hallway as sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_loc": { "$ref": "#/$defs/ROOMLOCENUM", "description": "The position of the room within the building" }, "room_moist_dam_hist": { "description": "The history of moisture damage or mold in the past 12 months. Number of events of moisture damage or mold observed", "type": "integer" }, "room_net_area": { "description": "The net floor area of sampling room. Net area excludes wall thicknesses", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_occup": { "description": "Count of room occupancy at time of sampling", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_samp_pos": { "$ref": "#/$defs/ROOMSAMPPOSENUM", "description": "The horizontal sampling position in the room relative to architectural elements" }, "room_type": { "description": "The main purpose or activity of the sampling room. A room is any distinguishable space within a structure", "type": "string" }, "room_vol": { "description": "Volume of sampling room", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "room_wall_share": { "description": "List of room(s) (room number, room name) sharing a wall with the sampling room", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[1-9][0-9]*$", "type": "string" }, "room_window_count": { "description": "Number of windows in the room", "type": "integer" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_sort_meth": { "description": "Method by which samples are sorted; open face filter collecting total suspended particles, prefilter to remove particles larger than X micrometers in diameter, where common values of X would be 10 and 2.5 full size sorting in a cascade impactor", "items": { "type": "string" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_time_out": { "description": "The recent and long term history of outside sampling", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_weather": { "$ref": "#/$defs/SAMPWEATHERENUM", "description": "The weather on the sampling day" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_use": { "$ref": "#/$defs/SEASONUSEENUM", "description": "The seasons the space is occupied" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "shad_dev_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the shading device" }, "shading_device_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the shading device at the time of sampling" }, "shading_device_loc": { "$ref": "#/$defs/SHADINGDEVICELOCENUM", "description": "The location of the shading device in relation to the built structure" }, "shading_device_mat": { "description": "The material the shading device is composed of", "type": "string" }, "shading_device_type": { "$ref": "#/$defs/SHADINGDEVICETYPEENUM", "description": "The type of shading device" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "space_typ_state": { "$ref": "#/$defs/SPACETYPSTATEENUM", "description": "Customary or normal state of the space" }, "specific": { "$ref": "#/$defs/SPECIFICENUM", "description": "The building specifications. If design is chosen, indicate phase: conceptual, schematic, design development, construction documents" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "specific_humidity": { "description": "The mass of water vapour in a unit mass of moist air, usually expressed as grams of vapour per kilogram of air, or, in air conditioning, as grains per pound", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "substructure_type": { "description": "The substructure or under building is that largely hidden section of the building which is built off the foundations to the ground floor level", "items": { "$ref": "#/$defs/SUBSTRUCTURETYPEENUM" }, "type": "array" }, "surf_air_cont": { "description": "Contaminant identified on surface", "items": { "$ref": "#/$defs/SURFAIRCONTENUM" }, "type": "array" }, "surf_humidity": { "description": "Surfaces: water activity as a function of air and material moisture", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "surf_moisture": { "description": "Water held on a surface", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "surf_moisture_ph": { "description": "ph measurement of surface", "type": "number" }, "surf_temp": { "description": "Temperature of the surface at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp_out": { "description": "The recorded temperature value at sampling time outside", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "train_line": { "$ref": "#/$defs/TRAINLINEENUM", "description": "The subway line name" }, "train_stat_loc": { "$ref": "#/$defs/TRAINSTATLOCENUM", "description": "The train station collection location" }, "train_stop_loc": { "$ref": "#/$defs/TRAINSTOPLOCENUM", "description": "The train stop collection location" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "typ_occup_density": { "description": "Customary or normal density of occupants", "type": "number" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "vis_media": { "description": "The building visual media", "type": "string" }, "wall_area": { "description": "The total area of the sampled room's walls", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_const_type": { "$ref": "#/$defs/WALLCONSTTYPEENUM", "description": "The building class of the wall defined by the composition of the building elements and fire-resistance rating" }, "wall_finish_mat": { "$ref": "#/$defs/WALLFINISHMATENUM", "description": "The material utilized to finish the outer most layer of the wall" }, "wall_height": { "description": "The average height of the walls in the sampled room", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the wall within the room" }, "wall_surf_treatment": { "$ref": "#/$defs/WALLSURFTREATMENTENUM", "description": "The surface treatment of interior wall" }, "wall_texture": { "$ref": "#/$defs/SHAREDENUM4", "description": "The feel, appearance, or consistency of a wall surface" }, "wall_thermal_mass": { "description": "The ability of the wall to provide inertia against temperature fluctuations. Generally this means concrete or concrete block that is either exposed or covered only with paint", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wall_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on a wall" }, "water_feat_size": { "description": "The size of the water feature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_feat_type": { "$ref": "#/$defs/WATERFEATTYPEENUM", "description": "The type of water feature present within the building being sampled" }, "weekday": { "$ref": "#/$defs/WEEKDAYENUM", "description": "The day of the week when sampling occurred" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "window_cond": { "$ref": "#/$defs/SHAREDENUM2", "description": "The physical condition of the window at the time of sampling" }, "window_cover": { "$ref": "#/$defs/WINDOWCOVERENUM", "description": "The type of window covering" }, "window_horiz_pos": { "$ref": "#/$defs/WINDOWHORIZPOSENUM", "description": "The horizontal position of the window on the wall" }, "window_loc": { "$ref": "#/$defs/SHAREDENUM0", "description": "The relative location of the window within the room" }, "window_mat": { "$ref": "#/$defs/WINDOWMATENUM", "description": "The type of material used to finish a window" }, "window_open_freq": { "description": "The number of times windows are opened per week", "type": "integer" }, "window_size": { "description": "The window's length and width", "type": "string" }, "window_status": { "$ref": "#/$defs/WINDOWSTATUSENUM", "description": "Defines whether the windows were open or closed during environmental testing" }, "window_type": { "$ref": "#/$defs/WINDOWTYPEENUM", "description": "The type of windows" }, "window_vert_pos": { "$ref": "#/$defs/WINDOWVERTPOSENUM", "description": "The vertical position of the window on the wall" }, "window_water_mold": { "$ref": "#/$defs/SHAREDENUM1", "description": "Signs of the presence of mold or mildew on the window" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "surf_material", "rel_air_humidity", "abs_air_humidity", "air_temp", "build_occup_type", "carb_dioxide", "ventilation_type", "organism_count", "indoor_space", "indoor_surf", "filter_type", "heat_cool_type", "building_setting", "light_type", "space_typ_state", "typ_occup_density", "occup_samp", "occup_density_samp" ], "title": "MiuvigBuiltEnvironment", "type": "object" }, "MiuvigFoodAnimalAndAnimalFeed": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the FoodAnimalAndAnimalFeed Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "animal_am": { "description": "The name(s) (generic or brand) of the antimicrobial(s) given to the food animal within the last 30 days", "type": "string" }, "animal_am_dur": { "description": "The duration of time (days) that the antimicrobial was administered to the food animal", "type": "string" }, "animal_am_freq": { "description": "The frequency per day that the antimicrobial was adminstered to the food animal", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "animal_am_route": { "description": "The route by which the antimicrobial is adminstered into the body of the food animal", "type": "string" }, "animal_am_use": { "description": "The prescribed intended use of or the condition treated by the antimicrobial given to the food animal by any route of administration", "type": "string" }, "animal_body_cond": { "$ref": "#/$defs/ANIMALBODYCONDENUM", "description": "Body condition scoring is a production management tool used to evaluate overall health and nutritional needs of a food animal. Because there are different scoring systems, this field is restricted to three categories" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_sex": { "$ref": "#/$defs/ANIMALSEXENUM", "description": "The sex and reproductive status of the food animal" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_source_age": { "description": "The age of the food source host organim. Depending on the type of host organism, age may be more appropriate to report in days, weeks, or years", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "coll_site_geo_feat", "food_origin", "food_prod", "food_product_type", "IFSAC_category", "intended_consumer", "samp_purpose" ], "title": "MiuvigFoodAnimalAndAnimalFeed", "type": "object" }, "MiuvigFoodFarmEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the FoodFarmEnvironment Extension", "properties": { "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "adjacent_environment": { "description": "Description of the environmental system or features that are adjacent to the sampling site. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "air_PM_concen": { "description": "Concentration of substances that remain suspended in the air, and comprise mixtures of organic and inorganic substances (PM10 and PM2.5); can report multiple PM's by entering numeric values preceded by name of PM", "items": { "type": "string" }, "type": "array" }, "air_flow_impede": { "description": "Presence of objects in the area that would influence or impede air flow through the air filter", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "anim_water_method": { "description": "Description of the equipment or method used to distribute water to livestock. This field accepts termed listed under water delivery equipment (http://opendata.inra.fr/EOL/EOL_0001653). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_diet": { "description": "If the isolate is from a food animal, the type of diet eaten by the food animal. Please list the main food staple and the setting, if appropriate. For a list of acceptable animal feed terms or categories, please see http://www.feedipedia.org. Multiple terms may apply and can be separated by pipes |Food product for animal covers foods intended for consumption by domesticated animals. Consult http://purl.obolibrary.org/obo/FOODON_03309997. If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes. If the proper descriptor is not listed please use text to describe the food product type", "items": { "type": "string" }, "type": "array" }, "animal_feed_equip": { "description": "Description of the feeding equipment used for livestock. This field accepts terms listed under feed delivery (http://opendata.inra.fr/EOL/EOL_0001757). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "animal_group_size": { "description": "The number of food animals of the same species that are maintained together as a unit, i.e. a herd or flock", "type": "integer" }, "animal_housing": { "description": "Description of the housing system of the livestock. This field accepts terms listed under terrestrial management housing system (http://opendata.inra.fr/EOL/EOL_0001605)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "animal_intrusion": { "description": "Identification of animals intruding on the sample or sample site including invertebrates (such as pests or pollinators) and vertebrates (such as wildlife or domesticated animals). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "crop_yield": { "description": "Amount of crop produced per unit or area of land", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "date_extr_weath": { "description": "Date of unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "format": "date-time", "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extr_weather_event": { "description": "Unusual weather events that may have affected microbial populations. Multiple terms can be separated by pipes, listed in reverse chronological order", "items": { "$ref": "#/$defs/EXTRWEATHEREVENTENUM" }, "type": "array" }, "farm_equip": { "description": "List of equipment used for planting, fertilization, harvesting, irrigation, land levelling, residue management, weeding or transplanting during the growing season. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_equip_san": { "description": "Method used to sanitize growing and harvesting equipment. This can including type and concentration of sanitizing solution. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "farm_equip_san_freq": { "description": "The number of times farm equipment is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually", "type": "string" }, "farm_equip_shared": { "description": "List of planting, growing or harvesting equipment shared with other farms. This field accepts terms listed under agricultural implement (http://purl.obolibrary.org/obo/AGRO_00000416). Multiple terms can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "farm_water_source": { "$ref": "#/$defs/FARMWATERSOURCEENUM", "description": "Source of water used on the farm for irrigation of crops or watering of livestock" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_admin": { "description": "Type of fertilizer or amendment added to the soil or water for the purpose of improving substrate health and quality for plant growth. This field accepts terms listed under agronomic fertilizer (http://purl.obolibrary.org/obo/AGRO_00002062). Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "fertilizer_date": { "description": "Date of administration of soil amendment or fertilizer. Multiple terms may apply and can be separated by pipes, listing in reverse chronological order", "format": "date-time", "type": "string" }, "food_clean_proc": { "$ref": "#/$defs/FOODCLEANPROCENUM", "description": "The process of cleaning food to separate other environmental materials from the food source. Multiple terms can be separated by pipes" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_harvest_proc": { "description": "A harvesting process is a process which takes in some food material from an individual or community of plant or animal organisms in a given context and time, and outputs a precursor or consumable food product. This may include a part of an organism or the whole, and may involve killing the organism", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_medium": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). The name of the medium used to grow the microorganism", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity": { "description": "Amount of water vapour in the air, at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_part_maturity": { "description": "A description of the stage of development of a plant or plant part based on maturity or ripeness. This field accepts terms listed under degree of plant maturity (http://purl.obolibrary.org/obo/FOODON_03530050)", "type": "string" }, "plant_reprod_crop": { "description": "Plant reproductive part used in the field during planting to start the crop", "items": { "$ref": "#/$defs/PLANTREPRODCROPENUM" }, "type": "array" }, "plant_water_method": { "description": "Description of the equipment or method used to distribute water to crops. This field accepts termed listed under irrigation process (http://purl.obolibrary.org/obo/AGRO_00000006). Multiple terms can be separated by pipes", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "rel_location": { "description": "Location of sampled soil to other parts of the farm e.g. under crop plant, near irrigation ditch, from the dirt road", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season": { "description": "The season when sampling occurred. Any of the four periods into which the year is divided by the equinoxes and solstices. This field accepts terms listed under season (http://purl.obolibrary.org/obo/NCIT_C94729)", "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:[a-zA-Z0-9]+\\]$", "type": "string" }, "season_humidity": { "description": "Average humidity of the region throughout the growing season", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_conductivity": { "description": "Conductivity of soil at time of sampling.", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_cover": { "description": "Material covering the sampled soil. This field accepts terms under ENVO:00010483, environmental material", "type": "string" }, "soil_pH": { "description": "The pH of soil at time of sampling.", "type": "number" }, "soil_porosity": { "description": "Porosity of soil or deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "soil_temp": { "description": "Temperature of soil at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_class": { "$ref": "#/$defs/SOILTEXTURECLASSENUM", "description": "One of the 12 soil texture classes use to describe soil texture based on the relative proportion of different grain sizes of mineral particles [sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um)] in a soil" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "solar_irradiance": { "description": "The amount of solar energy that arrives at a specific area of a surface during a specific time interval", "items": { "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_rate": { "description": "Ventilation rate of the system in the sampled premises", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ventilation_type": { "description": "Ventilation system used in the sampled premises", "items": { "type": "string" }, "type": "array" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_frequency": { "description": "Number of water delivery events within a given period of time", "type": "string" }, "water_pH": { "description": "pH measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid.", "type": "number" }, "water_source_adjac": { "description": "Description of the environmental features that are adjacent to the farm water source. This field accepts terms under ecosystem (http://purl.obolibrary.org/obo/ENVO_01001110) and human construction (http://purl.obolibrary.org/obo/ENVO_00000070). Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "water_source_shared": { "description": "Other users sharing access to the same water source. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "wind_direction": { "description": "Wind direction is the direction from which a wind originates", "type": "string" }, "wind_speed": { "description": "speed of wind measured at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "biotic_regm", "chem_administration", "food_product_type", "samp_type", "IFSAC_category" ], "title": "MiuvigFoodFarmEnvironment", "type": "object" }, "MiuvigFoodFoodProductionFacility": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the FoodFoodProductionFacility Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp": { "description": "Temperature of the air at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "area_samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "avg_occup": { "description": "Daily average occupancy of room. Indicate the number of person(s) daily occupying the sampling room", "type": "number" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biocide_used": { "description": "Substance intended for preventing, neutralizing, destroying, repelling, or mitigating the effects of any pest or microorganism; that inhibits the growth, reproduction, and activity of organisms, including fungal cells; decreases the number of fungi or pests present; deters microbial growth and degradation of other ingredients in the formulation. Indicate the biocide used on the location where the sample was taken. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_monitoring_zone": { "description": "An environmental monitoring zone is a formal designation as part of an environmental monitoring program, in which areas of a food production facility are categorized, commonly as zones 1-4, based on likelihood or risk of foodborne pathogen contamination. This field accepts terms listed under food production environmental monitoring zone (http://purl.obolibrary.org/obo/ENVO). Please add a term to indicate the environmental monitoring zone the sample was taken from", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "facility_type": { "description": "Establishment details about the type of facility where the sample was taken. This is independent of the specific product(s) within the facility", "items": { "$ref": "#/$defs/FACILITYTYPEENUM" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_dis_point_city": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_char": { "description": "Descriptors of the food production system such as wild caught, free-range, organic, free-range, industrial, dairy, beef", "items": { "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "freq_clean": { "$ref": "#/$defs/FREQCLEANENUM", "description": "The number of times the sample location is cleaned. Frequency of cleaning might be on a Daily basis, Weekly, Monthly, Quarterly or Annually" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "hygienic_area": { "description": "The subdivision of areas within a food production facility according to hygienic requirements. This field accepts terms listed under hygienic food production area (http://purl.obolibrary.org/obo/ENVO). Please add a term that most accurately indicates the hygienic area your sample was taken from according to the definitions provided", "type": "string" }, "indoor_surf": { "$ref": "#/$defs/INDOORSURFENUM", "description": "Type of indoor surface" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "prod_label_claims": { "description": "Labeling claims containing descriptors such as wild caught, free-range, organic, free-range, industrial, hormone-free, antibiotic free, cage free. Can include more than one term, separated by \";\"", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "room_dim": { "description": "The length, width and height of sampling room", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_floor": { "description": "The floor of the building, where the sampling room is located", "type": "string" }, "samp_loc_condition": { "$ref": "#/$defs/SAMPLOCCONDITIONENUM", "description": "The condition of the sample location at the time of sampling" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_room_id": { "description": "Sampling room number. This ID should be consistent with the designations on the building floor plans", "type": "integer" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_surf_moisture": { "description": "Degree of water held on a sampled surface. If present, user can state the degree of water held on surface (intermittent moisture, submerged). If no surface moisture is present indicate not present", "items": { "$ref": "#/$defs/SAMPSURFMOISTUREENUM" }, "type": "array" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "spec_intended_cons": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "ster_meth_samp_room": { "description": "The method used to sterilize the sampling room. This field accepts terms listed under electromagnetic radiation (http://purl.obolibrary.org/obo/ENVO_01001026). If the proper descriptor is not listed, please use text to describe the sampling room sterilization method. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "subspecf_gen_lin": { "description": "Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "type": "string" }, "surf_material": { "$ref": "#/$defs/SURFMATERIALENUM", "description": "Surface materials at the point of sampling" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "coll_site_geo_feat", "samp_source_mat_cat", "samp_type", "samp_stor_media", "samp_stor_device", "food_product_type", "IFSAC_category", "food_product_qual", "food_contact_surf" ], "title": "MiuvigFoodFoodProductionFacility", "type": "object" }, "MiuvigFoodHumanFoods": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the FoodHumanFoods Extension", "properties": { "HACCP_term": { "description": "Hazard Analysis Critical Control Points (HACCP) food safety terms; This field accepts terms listed under HACCP guide food safety term (http://purl.obolibrary.org/obo/FOODON_03530221)", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "IFSAC_category": { "description": "The IFSAC food categorization scheme has five distinct levels to which foods can be assigned, depending upon the type of food. First, foods are assigned to one of four food groups (aquatic animals, land animals, plants, and other). Food groups include increasingly specific food categories; dairy, eggs, meat and poultry, and game are in the land animal food group, and the category meat and poultry is further subdivided into more specific categories of meat (beef, pork, other meat) and poultry (chicken, turkey, other poultry). Finally, foods are differentiated by differences in food processing (such as pasteurized fluid dairy products, unpasteurized fluid dairy products, pasteurized solid and semi-solid dairy products, and unpasteurized solid and semi-solid dairy products. An IFSAC food category chart is available from https://www.cdc.gov/foodsafety/ifsac/projects/food-categorization-scheme.html PMID: 28926300", "items": { "type": "string" }, "type": "array" }, "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacterial_density": { "description": "Number of bacteria in sample, as defined by bacteria density (http://purl.obolibrary.org/obo/GENEPIO_0000043)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "coll_site_geo_feat": { "description": "Text or terms that describe the geographic feature where the food sample was obtained by the researcher. This field accepts selected terms listed under the following ontologies: anthropogenic geographic feature (http://purl.obolibrary.org/obo/ENVO_00000002), for example agricultural fairground [ENVO:01000986]; garden [ENVO:00000011} or any of its subclasses; market [ENVO:01000987]; water well [ENVO:01000002]; or human construction (http://purl.obolibrary.org/obo/ENVO_00000070)", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cons_food_stor_dur": { "description": "The storage duration of the food commodity by the consumer, prior to onset of illness or sample collection. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "cons_food_stor_temp": { "description": "Temperature at which food commodity was stored by the consumer, prior to onset of illness or sample collection", "type": "string" }, "cons_purch_date": { "description": "The date a food product was purchased by consumer", "format": "date-time", "type": "string" }, "cons_qty_purchased": { "description": "The quantity of food purchased by consumer", "pattern": "^[1-9][0-9]* ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "cult_isol_date": { "description": "The datetime marking the end of a process in which a sample yields a positive result for the target microbial analyte(s) in the form of an isolated colony or colonies", "format": "date-time", "type": "string" }, "cult_result": { "$ref": "#/$defs/CULTRESULTENUM", "description": "Any result of a bacterial culture experiment reported as a binary assessment such as positive/negative, active/inactive" }, "cult_result_org": { "description": "Taxonomic information about the cultured organism(s)", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "cult_target": { "description": "The target microbial analyte in terms of investigation scope. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "type": "string" }, "type": "array" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "dietary_claim_use": { "description": "These descriptors are used either for foods intended for special dietary use as defined in 21 CFR 105 or for foods that have special characteristics indicated in the name or labeling. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510023). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the most prominent dietary claim or use", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "enrichment_protocol": { "description": "The microbiological workflow or protocol followed to test for the presence or enumeration of the target microbial analyte(s). Please provide a PubMed or DOI reference for published protocols", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_chem_add": { "description": "Any chemicals that are added to the fermentation process to achieve the desired final product", "items": { "type": "string" }, "type": "array" }, "ferm_chem_add_perc": { "description": "The amount of chemical added to the fermentation process", "items": { "type": "number" }, "type": "array" }, "ferm_headspace_oxy": { "description": "The amount of headspace oxygen in a fermentation vessel", "type": "number" }, "ferm_medium": { "description": "The growth medium used for the fermented food fermentation process, which supplies the required nutrients. Usually this includes a carbon and nitrogen source, water, micronutrients and chemical additives", "type": "string" }, "ferm_pH": { "description": "The pH of the fermented food fermentation process", "type": "number" }, "ferm_rel_humidity": { "description": "The relative humidity of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_temp": { "description": "The temperature of the fermented food fermentation process", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ferm_time": { "description": "The time duration of the fermented food fermentation process", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "ferm_vessel": { "description": "The type of vessel used for containment of the fermentation", "type": "string" }, "food_additive": { "description": "A substance or substances added to food to maintain or improve safety and freshness, to improve or maintain nutritional value, or improve taste, texture and appearance. This field accepts terms listed under food additive (http://purl.obolibrary.org/obo/FOODON_03412972). Multiple terms can be separated by one or more pipes, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_allergen_label": { "description": "A label indication that the product contains a recognized allergen. This field accepts terms listed under dietary claim or use (http://purl.obolibrary.org/obo/FOODON_03510213)", "items": { "type": "string" }, "type": "array" }, "food_contact_surf": { "description": "The specific container or coating materials in direct contact with the food. Multiple values can be assigned. This field accepts terms listed under food contact surface (http://purl.obolibrary.org/obo/FOODON_03500010)", "items": { "type": "string" }, "type": "array" }, "food_contain_wrap": { "description": "Type of container or wrapping defined by the main container material, the container form, and the material of the liner lids or ends. Also type of container or wrapping by form; prefer description by material first, then by form. This field accepts terms listed under food container or wrapping (http://purl.obolibrary.org/obo/FOODON_03490100)", "type": "string" }, "food_cooking_proc": { "description": "The transformation of raw food by the application of heat. This field accepts terms listed under food cooking (http://purl.obolibrary.org/obo/FOODON_03450002)", "items": { "type": "string" }, "type": "array" }, "food_dis_point": { "description": "A reference to a place on the Earth, by its name or by its geographical location that refers to a distribution point along the food chain. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448). Reference: Adam Diamond, James Barham. Moving Food Along the Value Chain: Innovations in Regional Food Distribution. U.S. Dept. of Agriculture, Agricultural Marketing Service. Washington, DC. March 2012. http://dx.doi.org/10.9752/MS045.03-2012", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type": "array" }, "food_ingredient": { "description": "In this field, please list individual ingredients for multi-component food [FOODON:00002501] and simple foods that is not captured in food_type. Please use terms that are present in FoodOn. Multiple terms can be separated by one or more pipes |, but please consider limiting this list to the top 5 ingredients listed in order as on the food label. See also, https://www.fda.gov/food/food-ingredients-packaging/overview-food-ingredients-additives-colors", "items": { "type": "string" }, "type": "array" }, "food_name_status": { "description": "A datum indicating that use of a food product name is regulated in some legal jurisdiction. This field accepts terms listed under food product name legal status (http://purl.obolibrary.org/obo/FOODON_03530087)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_origin": { "description": "A reference to a place on the Earth, by its name or by its geographical location that describes the origin of the food commodity, either in terms of its cultivation or production. This field accepts terms listed under geographic location (http://purl.obolibrary.org/obo/GAZ_00000448)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_capacity": { "description": "The maximum number of product units within a package", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "food_pack_integrity": { "description": "A term label and term id to describe the state of the packing material and text to explain the exact condition. This field accepts terms listed under food packing medium integrity (http://purl.obolibrary.org/obo/FOODON_03530218)", "items": { "type": "string" }, "type": "array" }, "food_pack_medium": { "description": "The medium in which the food is packed for preservation and handling or the medium surrounding homemade foods, e.g., peaches cooked in sugar syrup. The packing medium may provide a controlled environment for the food. It may also serve to improve palatability and consumer appeal. This includes edible packing media (e.g. fruit juice), gas other than air (e.g. carbon dioxide), vacuum packed, or packed with aerosol propellant. This field accepts terms under food packing medium (http://purl.obolibrary.org/obo/FOODON_03480020). Multiple terms may apply and can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "food_preserv_proc": { "description": "The methods contributing to the prevention or retardation of microbial, enzymatic or oxidative spoilage and thus to the extension of shelf life. This field accepts terms listed under food preservation process (http://purl.obolibrary.org/obo/FOODON_03470107)", "items": { "type": "string" }, "type": "array" }, "food_prior_contact": { "description": "The material the food contacted (e.g., was processed in) prior to packaging. This field accepts terms listed under material of contact prior to food packaging (http://purl.obolibrary.org/obo/FOODON_03530077). If the proper descriptor is not listed please use text to describe the material of contact prior to food packaging", "items": { "type": "string" }, "type": "array" }, "food_prod": { "description": "Descriptors of the food production system or of the agricultural environment and growing conditions related to the farm production system, such as wild caught, organic, free-range, industrial, dairy, beef, domestic or cultivated food production. This field accepts terms listed under food production (http://purl.obolibrary.org/obo/FOODON_03530206). Multiple terms may apply and can be separated by pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_prod_synonym": { "description": "Other names by which the food product is known by (e.g., regional or non-English names)", "items": { "type": "string" }, "type": "array" }, "food_product_qual": { "description": "Descriptors for describing food visually or via other senses, which is useful for tasks like food inspection where little prior knowledge of how the food came to be is available. Some terms like \"food (frozen)\" are both a quality descriptor and the output of a process. This field accepts terms listed under food product by quality (http://purl.obolibrary.org/obo/FOODON_00002454)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "food_product_type": { "description": "A food product type is a class of food products that is differentiated by its food composition (e.g., single- or multi-ingredient), processing and/or consumption characteristics. This does not include brand name products but it may include generic food dish categories. This field accepts terms under food product type (http://purl.obolibrary.org/obo/FOODON:03400361). For terms related to food product for an animal, consult food product for animal (http://purl.obolibrary.org/obo/FOODON_03309997). If the proper descriptor is not listed please use text to describe the food type. Multiple terms can be separated by one or more pipes", "type": "string" }, "food_quality_date": { "description": "The date recommended for the use of the product while at peak quality, this date is not a reflection of safety unless used on infant formula this date is not a reflection of safety and is typically labeled on a food product as \"best if used by,\" best by,\" \"use by,\" or \"freeze by.\"", "type": "string" }, "food_source": { "description": "Type of plant or animal from which the food product or its major ingredient is derived or a chemical food source [FDA CFSAN 1995]", "type": "string" }, "food_trace_list": { "$ref": "#/$defs/FOODTRACELISTENUM", "description": "The FDA is proposing to establish additional traceability recordkeeping requirements (beyond what is already required in existing regulations) for persons who manufacture, process, pack, or hold foods the Agency has designated for inclusion on the Food Traceability List. The Food Traceability List (FTL) identifies the foods for which the additional traceability records described in the proposed rule would be required. The term Food Traceability List (FTL) refers not only to the foods specifically listed (https://www.fda.gov/media/142303/download), but also to any foods that contain listed foods as ingredients" }, "food_trav_mode": { "description": "A descriptor for the method of movement of food commodity along the food distribution system. This field accepts terms listed under travel mode (http://purl.obolibrary.org/obo/GENEPIO_0001064). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "type": "array" }, "food_trav_vehic": { "description": "A descriptor for the mobile machine which is used to transport food commodities along the food distribution system. This field accepts terms listed under vehicle (http://purl.obolibrary.org/obo/ENVO_01000604). If the proper descrptor is not listed please use text to describe the mode of travel. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "food_treat_proc": { "description": "Used to specifically characterize a food product based on the treatment or processes applied to the product or any indexed ingredient. The processes include adding, substituting or removing components or modifying the food or component, e.g., through fermentation. Multiple values can be assigned. This fields accepts terms listed under food treatment process (http://purl.obolibrary.org/obo/FOODON_03460111)", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "intended_consumer": { "description": "Food consumer type, human or animal, for which the food product is produced and marketed. This field accepts terms listed under food consumer group (http://purl.obolibrary.org/obo/FOODON_03510136) or NCBI taxid", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "library_prep_kit": { "description": "Packaged kits (containing adapters, indexes, enzymes, buffers etc.), tailored for specific sequencing workflows, which allow the simplified preparation of sequencing-ready libraries for small genomes, amplicons, and plasmids", "type": "string" }, "lot_number": { "description": "A distinctive alpha-numeric identification code assigned by the manufacturer or distributor to a specific quantity of manufactured material or product within a batch. Synonym: Batch Number. The submitter should provide lot number of the item followed by the item name for which the lot number was provided", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "microb_cult_med": { "description": "A culture medium used to select for, grow, and maintain prokaryotic microorganisms. Can be in either liquid (broth) or solidified (e.g. with agar) forms. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the culture medium", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "microb_start": { "description": "Any type of microorganisms used in food production. This field accepts terms listed under live organisms for food production (http://purl.obolibrary.org/obo/FOODON_0344453)", "type": "string" }, "microb_start_count": { "description": "Total cell count of starter culture per gram, volume or area of sample and the method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided. (example : total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "microb_start_inoc": { "description": "The amount of starter culture used to inoculate a new batch", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "microb_start_prep": { "description": "Information about the protocol or method used to prepare the starter inoculum", "type": "string" }, "microb_start_source": { "description": "The source from which the microbial starter culture was sourced. If commercially supplied, list supplier", "type": "string" }, "microb_start_taxID": { "description": "Please include Genus species and strain ID, if known of microorganisms used in food production. For complex communities, pipes can be used to separate two or more microbes", "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext_kit": { "description": "The name of the extraction kit used to recover the nucleic acid fraction of an input material is performed", "items": { "type": "string" }, "type": "array" }, "num_samp_collect": { "description": "The number of samples collected during the current sampling event", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "part_plant_animal": { "description": "The anatomical part of the organism being involved in food production or consumption; e.g., a carrot is the root of the plant (root vegetable). This field accepts terms listed under part of plant or animal (http://purl.obolibrary.org/obo/FOODON_03420116)", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "repository_name": { "description": "The name of the institution where the sample or DNA extract is held or \"sample not available\" if the sample was used in its entirety for analysis or otherwise not retained", "items": { "type": "string" }, "type": "array" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_pooling": { "description": "Physical combination of several instances of like material, e.g. RNA extracted from samples or dishes of cell cultures into one big aliquot of cells. Please provide a short description of the samples that were pooled", "items": { "type": "string" }, "type": "array" }, "samp_purpose": { "description": "The reason that the sample was collected", "type": "string" }, "samp_rep_biol": { "description": "Measurements of biologically distinct samples that show biological variation", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_rep_tech": { "description": "Repeated measurements of the same sample that show independent measures of the noise associated with the equipment and the protocols", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_source_mat_cat": { "description": "This is the scientific role or category that the subject organism or material has with respect to an investigation. This field accepts terms listed under specimen source material category (http://purl.obolibrary.org/obo/GENEPIO_0001237 or http://purl.obolibrary.org/obo/OBI_0100051)", "type": "string" }, "samp_stor_device": { "description": "The container used to store the sample. This field accepts terms listed under container (http://purl.obolibrary.org/obo/NCIT_C43186). If the proper descriptor is not listed please use text to describe the storage device", "type": "string" }, "samp_stor_media": { "description": "The liquid that is added to the sample collection device prior to sampling. If the sample is pre-hydrated, indicate the liquid media the sample is pre-hydrated with for storage purposes. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the sample storage media", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cont": { "$ref": "#/$defs/SAMPTRANSPORTCONTENUM", "description": "Conatiner in which the sample was stored during transport. Indicate the location name" }, "samp_transport_dur": { "description": "The duration of time from when the sample was collected until processed. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_transport_temp": { "description": "Temperature at which sample was transported, e.g. -20 or 4 degree Celsius", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sequencing_kit": { "description": "Pre-filled, ready-to-use reagent cartridges. Used to produce improved chemistry, cluster density and read length as well as improve quality (Q) scores. Reagent components are encoded to interact with the sequencing system to validate compatibility with user-defined applications. Indicate name of the sequencing kit", "type": "string" }, "sequencing_location": { "description": "The location the sequencing run was performed. Indicate the name of the lab or core facility where samples were sequenced", "type": "string" }, "serovar_or_serotype": { "description": "A characterization of a cell or microorganism based on the antigenic properties of the molecules on its surface. Indicate the name of a serovar or serotype of interest. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "spikein_AMR": { "description": "Qualitative description of a microbial response to antimicrobial agents. Bacteria may be susceptible or resistant to a broad range of antibiotic drugs or drug classes, with several intermediate states or phases. This field accepts terms under antimicrobial phenotype (http://purl.obolibrary.org/obo/ARO_3004299)", "items": { "type": "string" }, "type": "array" }, "spikein_antibiotic": { "description": "Antimicrobials used in research study to assess effects of exposure on microbiome of a specific site. Please list antimicrobial, common name and/or class and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) should also be provided (example: total prokaryotes; 3.5e7 cells per ml; qPCR)", "type": "string" }, "spikein_growth_med": { "description": "A liquid or gel containing nutrients, salts, and other factors formulated to support the growth of microorganisms, cells, or plants (National Cancer Institute Thesaurus). A growth medium is a culture medium which has the disposition to encourage growth of particular bacteria to the exclusion of others in the same growth environment. In this case, list the culture medium used to propagate the spike-in bacteria during preparation of spike-in inoculum. This field accepts terms listed under microbiological culture medium (http://purl.obolibrary.org/obo/MICRO_0000067). If the proper descriptor is not listed please use text to describe the spike in growth media", "items": { "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "type": "array" }, "spikein_metal": { "description": "Heavy metals used in research study to assess effects of exposure on microbiome of a specific site. Please list heavy metals and concentration used for spike-in", "items": { "type": "string" }, "type": "array" }, "spikein_org": { "description": "Taxonomic information about the spike-in organism(s). This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "pattern": "^(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])|[1-9][0-9]*$", "type": "string" }, "type": "array" }, "spikein_serovar": { "description": "Taxonomic information about the spike-in organism(s) at the serovar or serotype level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "spikein_strain": { "description": "Taxonomic information about the spike-in organism(s) at the strain level. This field accepts terms under organism (http://purl.obolibrary.org/obo/NCIT_C14250). This field also accepts identification numbers from NCBI under https://www.ncbi.nlm.nih.gov/taxonomy. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_design": { "description": "A plan specification comprised of protocols (which may specify how and what kinds of data will be gathered) that are executed as part of an investigation and is realized during a study design execution. This field accepts terms under study design (http://purl.obolibrary.org/obo/OBI_0500000). If the proper descriptor is not listed please use text to describe the study design. Multiple terms can be separated by pipes", "items": { "type": "string" }, "type": "array" }, "study_inc_dur": { "description": "Sample incubation duration if unpublished or unvalidated method is used. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "study_inc_temp": { "description": "Sample incubation temperature if unpublished or unvalidated method is used", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_timecourse": { "description": "For time-course research studies involving samples of the food commodity, indicate the total duration of the time-course study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "study_tmnt": { "description": "A process in which the act is intended to modify or alter some other material entity. From the study design, each treatment is comprised of one level of one or multiple factors. This field accepts terms listed under treatment (http://purl.obolibrary.org/obo/MCO_0000866). If the proper descriptor is not listed please use text to describe the study treatment. Multiple terms can be separated by one or more pipes", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "timepoint": { "description": "Time point at which a sample or observation is made or taken from a biomaterial as measured from some reference point. Indicate the timepoint written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "coll_site_geo_feat", "food_product_type", "IFSAC_category" ], "title": "MiuvigFoodHumanFoods", "type": "object" }, "MiuvigHostAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HostAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_press_diast": { "description": "Resting diastolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "blood_press_syst": { "description": "Resting systolic blood pressure, measured as mm mercury", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigHostAssociated", "type": "object" }, "MiuvigHumanAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HumanAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "amniotic_fluid_color": { "description": "Specification of the color of the amniotic fluid sample", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "blood_blood_disord": { "description": "History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, hematopoietic system disease (https://disease-ontology.org/?id=DOID:74)", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diet_last_six_month": { "description": "Specification of major diet changes in the last six months, if yes the change should be specified", "type": "string" }, "drug_usage": { "description": "Any drug used by subject and the frequency of usage; can include multiple drugs used", "items": { "type": "string" }, "type": "array" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "foetal_health_stat": { "description": "Specification of foetal health status, should also include abortion", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gestation_state": { "description": "Specification of the gestation state", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_hiv_stat": { "description": "HIV status of subject, if yes HAART initiation status should also be indicated as [YES or NO]", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "kidney_disord": { "description": "History of kidney disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, kidney disease (https://disease-ontology.org/?id=DOID:557)", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "maternal_health_stat": { "description": "Specification of the maternal health status", "type": "string" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_throat_disord": { "description": "History of nose-throat disorders; can include multiple disorders, The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850), upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pet_farm_animal": { "description": "Specification of presence of pets or farm animals in the environment of subject, if yes the animals should be specified; can include multiple animals present", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "pulmonary_disord": { "description": "History of pulmonary disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, lung disease (https://disease-ontology.org/?id=DOID:850)", "items": { "type": "string" }, "type": "array" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "smoker": { "description": "Specification of smoking status", "type": "boolean" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "study_complt_stat": { "description": "Specification of study completion status, if no the reason should be specified", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "travel_out_six_month": { "description": "Specification of the countries travelled in the last six months; can include multiple travels", "items": { "type": "string" }, "type": "array" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "twin_sibling": { "description": "Specification of twin sibling presence", "type": "boolean" }, "urine_collect_meth": { "$ref": "#/$defs/URINECOLLECTMETHENUM", "description": "Specification of urine collection method" }, "urogenit_tract_disor": { "description": "History of urogenital tract disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "weight_loss_3_month": { "description": "Specification of weight loss in the last three months, if yes should be further specified to include amount of weight loss", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigHumanAssociated", "type": "object" }, "MiuvigHumanGut": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HumanGut Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gastrointest_disord": { "description": "History of gastrointestinal tract disorders; can include multiple disorders. History of blood disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, gastrointestinal system disease (https://disease-ontology.org/?id=DOID:77)", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "liver_disord": { "description": "History of liver disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, liver disease (https://disease-ontology.org/?id=DOID:409)", "items": { "type": "string" }, "type": "array" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "special_diet": { "description": "Specification of special diet; can include multiple special diets", "items": { "type": "string" }, "type": "array" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigHumanGut", "type": "object" }, "MiuvigHumanOral": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HumanOral Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nose_mouth_teeth_throat_disord": { "description": "History of nose/mouth/teeth/throat disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, nose disease (https://disease-ontology.org/?id=DOID:2825), mouth disease (https://disease-ontology.org/?id=DOID:403), tooth disease (https://disease-ontology.org/?id=DOID:1091), or upper respiratory tract disease (https://disease-ontology.org/?id=DOID:974)", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_last_toothbrush": { "description": "Specification of the time since last toothbrushing", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigHumanOral", "type": "object" }, "MiuvigHumanSkin": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HumanSkin Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "dermatology_disord": { "description": "History of dermatology disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, skin disease (https://disease-ontology.org/?id=DOID:37)", "items": { "type": "string" }, "type": "array" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "dominant_hand": { "$ref": "#/$defs/DOMINANTHANDENUM", "description": "Dominant hand of the subject" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "time_since_last_wash": { "description": "Specification of the time since last wash", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigHumanSkin", "type": "object" }, "MiuvigHumanVaginal": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HumanVaginal Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "birth_control": { "description": "Specification of birth control medication used", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "douche": { "description": "Date of most recent douche", "format": "date-time", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethnicity": { "description": "A category of people who identify with each other, usually on the basis of presumed similarities such as a common language, ancestry, history, society, culture, nation or social treatment within their residing area. https://en.wikipedia.org/wiki/List_of_contemporary_ethnic_groups", "items": { "type": "string" }, "type": "array" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gynecologic_disord": { "description": "History of gynecological disorders; can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, female reproductive system disease (https://disease-ontology.org/?id=DOID:229)", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_mass_index": { "description": "Body mass index, calculated as weight/(height)squared", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_temp": { "description": "Core body temperature of the host when sample was collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_diet": { "description": "Type of diet depending on the host, for animals omnivore, herbivore etc., for humans high-fat, meditteranean etc.; can include multiple diet types", "items": { "type": "string" }, "type": "array" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_last_meal": { "description": "Content of last meal and time since feeding; can include multiple values", "items": { "type": "string" }, "type": "array" }, "host_occupation": { "description": "Most frequent job performed by subject", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_pulse": { "description": "Resting pulse, measured as beats per minute", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_sex": { "description": "Gender or physical sex of the host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hrt": { "description": "Whether subject had hormone replacement theraphy, and if yes start date", "format": "date-time", "type": "string" }, "hysterectomy": { "description": "Specification of whether hysterectomy was performed", "type": "boolean" }, "ihmc_medication_code": { "description": "Can include multiple medication codes", "items": { "type": "integer" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "medic_hist_perform": { "description": "Whether full medical history was collected", "type": "boolean" }, "menarche": { "description": "Date of most recent menstruation", "format": "date-time", "type": "string" }, "menopause": { "description": "Date of onset of menopause", "format": "date-time", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pregnancy": { "description": "Date due of pregnancy", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sexual_act": { "description": "Current sexual partner and frequency of sex", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "urogenit_disord": { "description": "History of urogenital disorders, can include multiple disorders. The terms should be chosen from the DO (Human Disease Ontology) at http://www.disease-ontology.org, reproductive system disease (https://disease-ontology.org/?id=DOID:15) or urinary system disease (https://disease-ontology.org/?id=DOID:18)", "items": { "type": "string" }, "type": "array" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigHumanVaginal", "type": "object" }, "MiuvigHydrocarbonResourcesCores": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HydrocarbonResourcesCores Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "owc_tvdss": { "description": "Depth of the original oil water contact (OWC) zone (average) (m TVDSS)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "permeability": { "description": "Measure of the ability of a hydrocarbon resource to allow fluids to pass through it. (Additional information: https://en.wikipedia.org/wiki/Permeability_(earth_sciences))", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_md": { "description": "In non deviated well, measured depth is equal to the true vertical depth, TVD (TVD=TVDSS plus the reference or datum it refers to). In deviated wells, the MD is the length of trajectory of the borehole measured from the same reference or datum. Common datums used are ground level (GL), drilling rig floor (DF), rotary table (RT), kelly bushing (KB) and mean sea level (MSL). If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_tvdss": { "description": "Depth of the sample i.e. The vertical distance between the sea level and the sampled position in the subsurface. Depth can be reported as an interval for subsurface samples e.g. 1325.75-1362.25 m", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sr_dep_env": { "$ref": "#/$defs/SRDEPENVENUM", "description": "Source rock depositional environment (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of source rock (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_kerog_type": { "$ref": "#/$defs/SRKEROGTYPEENUM", "description": "Origin of kerogen. Type I: Algal (aquatic), Type II: planktonic and soft plant material (aquatic or terrestrial), Type III: terrestrial woody/ fibrous plant material (terrestrial), Type IV: oxidized recycled woody debris (terrestrial) (additional information: https://en.wikipedia.org/wiki/Kerogen). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sr_lithology": { "$ref": "#/$defs/SRLITHOLOGYENUM", "description": "Lithology of source rock (https://en.wikipedia.org/wiki/Source_rock). If \"other\" is specified, please propose entry in \"additional info\" field" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "source_uvig", "temp", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "hcr", "hc_produced", "basin", "hcr_temp", "sulfate_fw", "vfa_fw", "samp_type", "api" ], "title": "MiuvigHydrocarbonResourcesCores", "type": "object" }, "MiuvigHydrocarbonResourcesFluidsSwabs": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the HydrocarbonResourcesFluidsSwabs Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "add_recov_method": { "description": "Additional (i.e. Secondary, tertiary, etc.) recovery methods deployed for increase of hydrocarbon recovery from resource and start date for each one of them. If \"other\" is specified, please propose entry in \"additional info\" field", "type": "string" }, "additional_info": { "description": "Information that doesn't fit anywhere else. Can also be used to propose new entries for fields with controlled vocabulary", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "api": { "description": "API gravity is a measure of how heavy or light a petroleum liquid is compared to water (source: https://en.wikipedia.org/wiki/API_gravity) (e.g. 31.1 API)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aromatics_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "asphaltenes_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "basin": { "description": "Name of the basin (e.g. Campos)", "type": "string" }, "benzene": { "description": "Concentration of benzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biocide": { "description": "List of biocides (commercial name of product and supplier) and date of administration", "type": "string" }, "biocide_admin_method": { "description": "Method of biocide administration (dose, frequency, duration, time elapsed between last biociding and sampling) (e.g. 150 mg/l; weekly; 4 hr; 3 days)", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_treat_method": { "description": "Method of chemical administration(dose, frequency, duration, time elapsed between administration and sampling) (e.g. 50 mg/l; twice a week; 1 hr; 0 days)", "type": "string" }, "chem_treatment": { "description": "List of chemical compounds administered upstream the sampling location where sampling occurred (e.g. Glycols, H2S scavenger, corrosion and scale inhibitors, demulsifiers, and other production chemicals etc.). The commercial name of the product and name of the supplier should be provided. The date of administration should also be included", "type": "string" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depos_env": { "$ref": "#/$defs/DEPOSENVENUM", "description": "Main depositional environment (https://en.wikipedia.org/wiki/Depositional_environment). If \"other\" is specified, please propose entry in \"additional info\" field" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_iron": { "description": "Concentration of dissolved iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen_fluid": { "description": "Concentration of dissolved oxygen in the oil field produced fluids as it contributes to oxgen-corrosion and microbial activity (e.g. Mic)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "ethylbenzene": { "description": "Concentration of ethylbenzene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "field": { "description": "Name of the hydrocarbon field (e.g. Albacora)", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hc_produced": { "$ref": "#/$defs/HCPRODUCEDENUM", "description": "Main hydrocarbon type produced from resource (i.e. Oil, gas, condensate, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr": { "$ref": "#/$defs/HCRENUM", "description": "Main Hydrocarbon Resource type. The term \"Hydrocarbon Resource\" HCR defined as a natural environmental feature containing large amounts of hydrocarbons at high concentrations potentially suitable for commercial exploitation. This term should not be confused with the Hydrocarbon Occurrence term which also includes hydrocarbon-rich environments with currently limited commercial interest such as seeps, outcrops, gas hydrates etc. If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_fw_salinity": { "description": "Original formation water salinity (prior to secondary recovery e.g. Waterflooding) expressed as TDS", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_geol_age": { "$ref": "#/$defs/SHAREDENUM5", "description": "Geological age of hydrocarbon resource (Additional info: https://en.wikipedia.org/wiki/Period_(geology)). If \"other\" is specified, please propose entry in \"additional info\" field" }, "hcr_pressure": { "description": "Original pressure of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "hcr_temp": { "description": "Original temperature of the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "iw_bt_date_well": { "description": "Injection water breakthrough date per well following a secondary and/or tertiary recovery", "format": "date-time", "type": "string" }, "iwf": { "description": "Proportion of the produced fluids derived from injected water at the time of sampling. (e.g. 87%)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "number" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "lithology": { "$ref": "#/$defs/LITHOLOGYENUM", "description": "Hydrocarbon resource main lithology (Additional information: http://petrowiki.org/Lithology_and_rock_type_determination). If \"other\" is specified, please propose entry in \"additional info\" field" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_count_qpcr_info": { "description": "If qpcr was used for the cell count, the target gene name, the primer sequence and the cycling conditions should also be provided. (Example: 16S rrna; FWD:ACGTAGCTATGACGT REV:GTGCTAGTCGAGTAC; initial denaturation:90C_5min; denaturation:90C_2min; annealing:52C_30 sec; elongation:72C_30 sec; 90 C for 1 min; final elongation:72C_5min; 30 cycles)", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pour_point": { "description": "Temperature at which a liquid becomes semi solid and loses its flow characteristics. In crude oil a high pour point is generally associated with a high paraffin content, typically found in crude deriving from a larger proportion of plant material. (soure: https://en.wikipedia.org/wiki/pour_point)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_rate": { "description": "Oil and/or gas production rates per well (e.g. 524 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "prod_start_date": { "description": "Date of field's first production", "format": "date-time", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "reservoir": { "description": "Name of the reservoir (e.g. Carapebus)", "type": "string" }, "resins_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_point": { "$ref": "#/$defs/SAMPCOLLECTPOINTENUM", "description": "Sampling point on the asset were sample was collected (e.g. Wellhead, storage tank, separator, etc). If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_loc_corr_rate": { "description": "Metal corrosion rate is the speed of metal deterioration due to environmental conditions. As environmental conditions change corrosion rates change accordingly. Therefore, long term corrosion rates are generally more informative than short term rates and for that reason they are preferred during reporting. In the case of suspected MIC, corrosion rate measurements at the time of sampling might provide insights into the involvement of certain microbial community members in MIC as well as potential microbial interplays", "pattern": "^[-+]?[0-9]*\\.?[0-9]+ *- *[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_preserv": { "description": "Preservative added to the sample (e.g. Rnalater, alcohol, formaldehyde, etc.). Where appropriate include volume added (e.g. Rnalater; 2 ml)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_subtype": { "$ref": "#/$defs/SAMPSUBTYPEENUM", "description": "Name of sample sub-type. For example if \"sample type\" is \"Produced Water\" then subtype could be \"Oil Phase\" or \"Water Phase\". If \"other\" is specified, please propose entry in \"additional info\" field" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_transport_cond": { "description": "Sample transport duration (in days or hrs) and temperature the sample was exposed to (e.g. 5.5 days; 20 C)", "type": "string" }, "samp_type": { "description": "The type of material from which the sample was obtained. For the Hydrocarbon package, samples include types like core, rock trimmings, drill cuttings, piping section, coupon, pigging debris, solid deposit, produced fluid, produced water, injected water, swabs, etc. For the Food Package, samples are usually categorized as food, body products or tissues, or environmental material. This field accepts terms listed under environmental specimen (http://purl.obolibrary.org/obo/GENEPIO_0001246)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_well_name": { "description": "Name of the well (e.g. BXA1123) where sample was taken", "type": "string" }, "saturates_pc": { "description": "Saturate, Aromatic, Resin and Asphaltene (SARA) is an analysis method that divides crude oil components according to their polarizability and polarity. There are three main methods to obtain SARA results. The most popular one is known as the Iatroscan TLC-FID and is referred to as IP-143 (source: https://en.wikipedia.org/wiki/Saturate,_aromatic,_resin_and_asphaltene)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);[-+]?[0-9]*\\.?[0-9]+ ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate_fw": { "description": "Original sulfate concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tan": { "description": "Total Acid Number (TAN) is a measurement of acidity that is determined by the amount of potassium hydroxide in milligrams that is needed to neutralize the acids in one gram of oil. It is an important quality measurement of crude oil. (source: https://en.wikipedia.org/wiki/Total_acid_number)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "toluene": { "description": "Concentration of toluene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_iron": { "description": "Concentration of total iron in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_sulfur": { "description": "Concentration of total sulfur in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "tvdss_of_hcr_press": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original pressure was measured (e.g. 1578 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tvdss_of_hcr_temp": { "description": "True vertical depth subsea (TVDSS) of the hydrocarbon resource where the original temperature was measured (e.g. 1345 m)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa": { "description": "Concentration of Volatile Fatty Acids in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vfa_fw": { "description": "Original volatile fatty acid concentration in the hydrocarbon resource", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "viscosity": { "description": "A measure of oil's resistance to gradual deformation by shear stress or tensile stress (e.g. 3.5 cp; 100 C)", "type": "string" }, "water_cut": { "description": "Current amount of water (%) in a produced fluid stream; or the average of the combined streams", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_prod_rate": { "description": "Water production rates per well (e.g. 987 m3 / day)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" }, "win": { "description": "A unique identifier of a well or wellbore. This is part of the Global Framework for Well Identification initiative which is compiled by the Professional Petroleum Data Management Association (PPDM) in an effort to improve well identification systems. (Supporting information: https://ppdm.org/ and http://dl.ppdm.org/dl/690)", "type": "string" }, "xylene": { "description": "Concentration of xylene in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "source_uvig", "temp", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "hcr", "hc_produced", "basin", "water_cut", "iwf", "add_recov_method", "samp_type", "samp_collect_point", "sulfate", "sulfide", "nitrate", "api" ], "title": "MiuvigHydrocarbonResourcesFluidsSwabs", "type": "object" }, "MiuvigMicrobialMatBiofilm": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the MicrobialMatBiofilm Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigMicrobialMatBiofilm", "type": "object" }, "MiuvigMiscellaneousNaturalOrArtificialEnvironment": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the MiscellaneousNaturalOrArtificialEnvironment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigMiscellaneousNaturalOrArtificialEnvironment", "type": "object" }, "MiuvigPlantAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the PlantAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigPlantAssociated", "type": "object" }, "MiuvigSediment": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the Sediment Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigSediment", "type": "object" }, "MiuvigSoil": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the Soil Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "elev", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigSoil", "type": "object" }, "MiuvigSymbiontAssociated": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the SymbiontAssociated Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "MiuvigSymbiontAssociated", "type": "object" }, "MiuvigWastewaterSludge": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the WastewaterSludge Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigWastewaterSludge", "type": "object" }, "MiuvigWater": { "additionalProperties": false, "description": "MIxS data that complies with the Miuvig checklist and the Water Extension", "properties": { "adapters": { "description": "Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters", "type": "string" }, "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annot": { "description": "Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter", "type": "string" }, "assembly_name": { "description": "Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community", "type": "string" }, "assembly_qual": { "description": "The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated", "type": "string" }, "assembly_software": { "description": "Tool(s) used for assembly, including version number and parameters", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "associated_resource": { "description": "A related resource that is referenced, cited, or otherwise associated to the sequence", "items": { "type": "string" }, "type": "array" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bin_param": { "$ref": "#/$defs/BINPARAMENUM", "description": "The parameters that have been applied during the extraction of genomes from metagenomic datasets" }, "bin_software": { "description": "Tool(s) used for the extraction of genomes from metagenomic datasets, where possible include a product ID (PID) of the tool(s) used", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "biotic_relationship": { "$ref": "#/$defs/BIOTICRELATIONSHIPENUM", "description": "Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "compl_appr": { "$ref": "#/$defs/COMPLAPPRENUM", "description": "The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome" }, "compl_score": { "description": "Completeness score is typically based on either the fraction of markers found as compared to a database or the percent of a genome found as compared to a closely related reference genome. High Quality Draft: >90%, Medium Quality Draft: >50%, and Low Quality Draft: < 50% should have the indicated completeness scores", "type": "string" }, "compl_software": { "description": "Tools used for completion estimate, i.e. checkm, anvi'o, busco", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "detec_type": { "description": "Type of UViG detection", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "env_broad_scale": { "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "env_local_scale": { "description": "Report the entity or entities which are in the sample or specimen s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS", "type": "string" }, "env_medium": { "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "estimated_size": { "description": "The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period", "type": "string" }, "experimental_factor": { "description": "Variable aspects of an experiment design that can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI)", "items": { "pattern": "^\\S+.*\\S+ \\[[a-zA-Z]{2,}:\\d+\\]$", "type": "string" }, "type": "array" }, "feat_pred": { "description": "Method used to predict UViGs features such as ORFs, integration site, etc", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_pred_appr": { "$ref": "#/$defs/HOSTPREDAPPRENUM", "description": "Tool or approach used for host prediction" }, "host_pred_est_acc": { "description": "For each tool or approach used for host prediction, estimated false discovery rates should be included, either computed de novo or from the literature", "type": "string" }, "host_spec_range": { "description": "The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier", "items": { "type": "string" }, "type": "array" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "lib_layout": { "$ref": "#/$defs/LIBLAYOUTENUM", "description": "Specify whether to expect single, paired, or other configuration of reads" }, "lib_reads_seqd": { "description": "Total number of clones sequenced from the library", "type": "integer" }, "lib_screen": { "description": "Specific enrichment or screening methods applied before and/or after creating libraries", "type": "string" }, "lib_size": { "description": "Total number of clones in the library prepared for the project", "type": "integer" }, "lib_vector": { "description": "Cloning vector type(s) used in construction of libraries", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mag_cov_software": { "$ref": "#/$defs/MAGCOVSOFTWAREENUM", "description": "Tool(s) used to determine the genome coverage if coverage is used as a binning parameter in the extraction of genomes from metagenomic datasets" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mid": { "description": "Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters", "pattern": "^[ACGTRKSYMWBHDVN]+$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "neg_cont_type": { "$ref": "#/$defs/NEGCONTTYPEENUM", "description": "The substance or equipment used as a negative control in an investigation" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nucl_acid_amp": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the enzymatic amplification (PCR, TMA, NASBA) of specific nucleic acids", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "nucl_acid_ext": { "description": "A link to a literature reference, electronic resource or a standard operating procedure (SOP), that describes the material separation to recover the nucleic acid fraction from a sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "number_contig": { "description": "Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG", "type": "integer" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "otu_class_appr": { "description": "Cutoffs and approach used when clustering species-level OTUs. Note that results from standard 95% ANI / 85% AF clustering should be provided alongside OTUS defined from another set of thresholds, even if the latter are the ones primarily used during the analysis", "type": "string" }, "otu_db": { "description": "Reference database (i.e. sequences not generated as part of the current study) used to cluster new genomes in \"species-level\" OTUs, if any", "type": "string" }, "otu_seq_comp_appr": { "description": "Tool and thresholds used to compare sequences when computing \"species-level\" OTUs", "type": "string" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pathogenicity": { "description": "To what is the entity pathogenic", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pos_cont_type": { "description": "The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pred_genome_struc": { "$ref": "#/$defs/PREDGENOMESTRUCENUM", "description": "Expected structure of the viral genome" }, "pred_genome_type": { "description": "Type of genome predicted for the UViG", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reassembly_bin": { "description": "Has an assembly been performed on a genome bin extracted from a metagenomic assembly?", "type": "boolean" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ref_biomaterial": { "description": "Primary publication if isolated before genome publication; otherwise, primary genome report", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "ref_db": { "description": "List of database(s) used for ORF annotation, along with version number and reference to website or publication", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_collect_device": { "description": "The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094)", "type": "string" }, "samp_collect_method": { "description": "The method employed for collecting the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_mat_process": { "description": "A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_size": { "description": "The total amount or size (volume (ml), mass (g) or area (m2) ) of sample collected", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_taxon_id": { "description": "NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome for mock community/positive controls, or 'blank sample' for negative controls", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[NCBITaxon:\\d+\\]$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sc_lysis_approach": { "$ref": "#/$defs/SCLYSISAPPROACHENUM", "description": "Method used to free DNA from interior of the cell(s) or particle(s)" }, "sc_lysis_method": { "description": "Name of the kit or standard protocol used for cell(s) or particle(s) lysis", "type": "string" }, "seq_meth": { "description": "Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)|(([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\])$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sim_search_meth": { "description": "Tool used to compare ORFs with database, along with version and cutoffs used", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac": { "description": "Filtering pore size used in sample preparation", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sop": { "description": "Standard operating procedures used in assembly and/or annotation of genomes, metagenomes or environmental sequences", "items": { "type": "string" }, "type": "array" }, "sort_tech": { "$ref": "#/$defs/SORTTECHENUM", "description": "Method used to sort/isolate cells or particles of interest" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "source_uvig": { "description": "Type of dataset from which the UViG was obtained", "type": "string" }, "specific_host": { "description": "Report the host's taxonomic name and/or NCBI taxonomy ID", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tax_class": { "description": "Method used for taxonomic classification, along with reference database used, classification rank, and thresholds used to classify new genomes", "type": "string" }, "tax_ident": { "$ref": "#/$defs/TAXIDENTENUM", "description": "The phylogenetic marker(s) used to assign an organism name to the SAG or MAG" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trna_ext_software": { "description": "Tools used for tRNA identification", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+);([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "trnas": { "description": "The total number of tRNAs identified from the SAG or MAG", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "vir_ident_software": { "description": "Tool(s) used for the identification of UViG as a viral genome, software or protocol name including version number, parameters, and cutoffs used", "type": "string" }, "virus_enrich_appr": { "$ref": "#/$defs/VIRUSENRICHAPPRENUM", "description": "List of approaches used to enrich the sample for viruses, if any" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wga_amp_appr": { "$ref": "#/$defs/WGAAMPAPPRENUM", "description": "Method used to amplify genomic DNA in preparation for sequencing" }, "wga_amp_kit": { "description": "Kit used to amplify genomic DNA in preparation for sequencing", "type": "string" } }, "required": [ "samp_name", "source_uvig", "number_contig", "assembly_qual", "project_name", "virus_enrich_appr", "assembly_software", "env_local_scale", "depth", "env_medium", "samp_taxon_id", "geo_loc_name", "collection_date", "seq_meth", "lat_lon", "env_broad_scale", "vir_ident_software", "pred_genome_type", "pred_genome_struc", "detec_type" ], "title": "MiuvigWater", "type": "object" }, "MixsCompliantData": { "additionalProperties": false, "description": "A collection of data that complies with some combination of a MIxS checklist and environmental extension", "properties": { "agriculture_data": { "description": "Data that complies with Extension Agriculture", "items": { "$ref": "#/$defs/Agriculture" }, "type": "array" }, "air_data": { "description": "Data that complies with Extension Air", "items": { "$ref": "#/$defs/Air" }, "type": "array" }, "built_environment_data": { "description": "Data that complies with Extension BuiltEnvironment", "items": { "$ref": "#/$defs/BuiltEnvironment" }, "type": "array" }, "food_animal_and_animal_feed_data": { "description": "Data that complies with Extension FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/FoodAnimalAndAnimalFeed" }, "type": "array" }, "food_farm_environment_data": { "description": "Data that complies with Extension FoodFarmEnvironment", "items": { "$ref": "#/$defs/FoodFarmEnvironment" }, "type": "array" }, "food_food_production_facility_data": { "description": "Data that complies with Extension FoodFoodProductionFacility", "items": { "$ref": "#/$defs/FoodFoodProductionFacility" }, "type": "array" }, "food_human_foods_data": { "description": "Data that complies with Extension FoodHumanFoods", "items": { "$ref": "#/$defs/FoodHumanFoods" }, "type": "array" }, "host_associated_data": { "description": "Data that complies with Extension HostAssociated", "items": { "$ref": "#/$defs/HostAssociated" }, "type": "array" }, "human_associated_data": { "description": "Data that complies with Extension HumanAssociated", "items": { "$ref": "#/$defs/HumanAssociated" }, "type": "array" }, "human_gut_data": { "description": "Data that complies with Extension HumanGut", "items": { "$ref": "#/$defs/HumanGut" }, "type": "array" }, "human_oral_data": { "description": "Data that complies with Extension HumanOral", "items": { "$ref": "#/$defs/HumanOral" }, "type": "array" }, "human_skin_data": { "description": "Data that complies with Extension HumanSkin", "items": { "$ref": "#/$defs/HumanSkin" }, "type": "array" }, "human_vaginal_data": { "description": "Data that complies with Extension HumanVaginal", "items": { "$ref": "#/$defs/HumanVaginal" }, "type": "array" }, "hydrocarbon_resources_cores_data": { "description": "Data that complies with Extension HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/HydrocarbonResourcesCores" }, "type": "array" }, "hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Extension HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/HydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "microbial_mat_biofilm_data": { "description": "Data that complies with Extension MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MicrobialMatBiofilm" }, "type": "array" }, "migs_ba_agriculture_data": { "description": "Data that complies with MigsBa combined with Agriculture", "items": { "$ref": "#/$defs/MigsBaAgriculture" }, "type": "array" }, "migs_ba_air_data": { "description": "Data that complies with MigsBa combined with Air", "items": { "$ref": "#/$defs/MigsBaAir" }, "type": "array" }, "migs_ba_built_environment_data": { "description": "Data that complies with MigsBa combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsBaBuiltEnvironment" }, "type": "array" }, "migs_ba_data": { "description": "Data that complies with checklist MigsBa", "items": { "$ref": "#/$defs/MigsBa" }, "type": "array" }, "migs_ba_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsBa combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsBaFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_ba_food_farm_environment_data": { "description": "Data that complies with MigsBa combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsBaFoodFarmEnvironment" }, "type": "array" }, "migs_ba_food_food_production_facility_data": { "description": "Data that complies with MigsBa combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsBaFoodFoodProductionFacility" }, "type": "array" }, "migs_ba_food_human_foods_data": { "description": "Data that complies with MigsBa combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsBaFoodHumanFoods" }, "type": "array" }, "migs_ba_host_associated_data": { "description": "Data that complies with MigsBa combined with HostAssociated", "items": { "$ref": "#/$defs/MigsBaHostAssociated" }, "type": "array" }, "migs_ba_human_associated_data": { "description": "Data that complies with MigsBa combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsBaHumanAssociated" }, "type": "array" }, "migs_ba_human_gut_data": { "description": "Data that complies with MigsBa combined with HumanGut", "items": { "$ref": "#/$defs/MigsBaHumanGut" }, "type": "array" }, "migs_ba_human_oral_data": { "description": "Data that complies with MigsBa combined with HumanOral", "items": { "$ref": "#/$defs/MigsBaHumanOral" }, "type": "array" }, "migs_ba_human_skin_data": { "description": "Data that complies with MigsBa combined with HumanSkin", "items": { "$ref": "#/$defs/MigsBaHumanSkin" }, "type": "array" }, "migs_ba_human_vaginal_data": { "description": "Data that complies with MigsBa combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsBaHumanVaginal" }, "type": "array" }, "migs_ba_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsBa combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsBaHydrocarbonResourcesCores" }, "type": "array" }, "migs_ba_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsBa combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsBaHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_ba_microbial_mat_biofilm_data": { "description": "Data that complies with MigsBa combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsBaMicrobialMatBiofilm" }, "type": "array" }, "migs_ba_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsBa combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsBaMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_ba_plant_associated_data": { "description": "Data that complies with MigsBa combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsBaPlantAssociated" }, "type": "array" }, "migs_ba_sediment_data": { "description": "Data that complies with MigsBa combined with Sediment", "items": { "$ref": "#/$defs/MigsBaSediment" }, "type": "array" }, "migs_ba_soil_data": { "description": "Data that complies with MigsBa combined with Soil", "items": { "$ref": "#/$defs/MigsBaSoil" }, "type": "array" }, "migs_ba_symbiont_associated_data": { "description": "Data that complies with MigsBa combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsBaSymbiontAssociated" }, "type": "array" }, "migs_ba_wastewater_sludge_data": { "description": "Data that complies with MigsBa combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsBaWastewaterSludge" }, "type": "array" }, "migs_ba_water_data": { "description": "Data that complies with MigsBa combined with Water", "items": { "$ref": "#/$defs/MigsBaWater" }, "type": "array" }, "migs_eu_agriculture_data": { "description": "Data that complies with MigsEu combined with Agriculture", "items": { "$ref": "#/$defs/MigsEuAgriculture" }, "type": "array" }, "migs_eu_air_data": { "description": "Data that complies with MigsEu combined with Air", "items": { "$ref": "#/$defs/MigsEuAir" }, "type": "array" }, "migs_eu_built_environment_data": { "description": "Data that complies with MigsEu combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsEuBuiltEnvironment" }, "type": "array" }, "migs_eu_data": { "description": "Data that complies with checklist MigsEu", "items": { "$ref": "#/$defs/MigsEu" }, "type": "array" }, "migs_eu_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsEu combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsEuFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_eu_food_farm_environment_data": { "description": "Data that complies with MigsEu combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsEuFoodFarmEnvironment" }, "type": "array" }, "migs_eu_food_food_production_facility_data": { "description": "Data that complies with MigsEu combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsEuFoodFoodProductionFacility" }, "type": "array" }, "migs_eu_food_human_foods_data": { "description": "Data that complies with MigsEu combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsEuFoodHumanFoods" }, "type": "array" }, "migs_eu_host_associated_data": { "description": "Data that complies with MigsEu combined with HostAssociated", "items": { "$ref": "#/$defs/MigsEuHostAssociated" }, "type": "array" }, "migs_eu_human_associated_data": { "description": "Data that complies with MigsEu combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsEuHumanAssociated" }, "type": "array" }, "migs_eu_human_gut_data": { "description": "Data that complies with MigsEu combined with HumanGut", "items": { "$ref": "#/$defs/MigsEuHumanGut" }, "type": "array" }, "migs_eu_human_oral_data": { "description": "Data that complies with MigsEu combined with HumanOral", "items": { "$ref": "#/$defs/MigsEuHumanOral" }, "type": "array" }, "migs_eu_human_skin_data": { "description": "Data that complies with MigsEu combined with HumanSkin", "items": { "$ref": "#/$defs/MigsEuHumanSkin" }, "type": "array" }, "migs_eu_human_vaginal_data": { "description": "Data that complies with MigsEu combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsEuHumanVaginal" }, "type": "array" }, "migs_eu_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsEu combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsEuHydrocarbonResourcesCores" }, "type": "array" }, "migs_eu_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsEu combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsEuHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_eu_microbial_mat_biofilm_data": { "description": "Data that complies with MigsEu combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsEuMicrobialMatBiofilm" }, "type": "array" }, "migs_eu_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsEu combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsEuMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_eu_plant_associated_data": { "description": "Data that complies with MigsEu combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsEuPlantAssociated" }, "type": "array" }, "migs_eu_sediment_data": { "description": "Data that complies with MigsEu combined with Sediment", "items": { "$ref": "#/$defs/MigsEuSediment" }, "type": "array" }, "migs_eu_soil_data": { "description": "Data that complies with MigsEu combined with Soil", "items": { "$ref": "#/$defs/MigsEuSoil" }, "type": "array" }, "migs_eu_symbiont_associated_data": { "description": "Data that complies with MigsEu combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsEuSymbiontAssociated" }, "type": "array" }, "migs_eu_wastewater_sludge_data": { "description": "Data that complies with MigsEu combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsEuWastewaterSludge" }, "type": "array" }, "migs_eu_water_data": { "description": "Data that complies with MigsEu combined with Water", "items": { "$ref": "#/$defs/MigsEuWater" }, "type": "array" }, "migs_org_agriculture_data": { "description": "Data that complies with MigsOrg combined with Agriculture", "items": { "$ref": "#/$defs/MigsOrgAgriculture" }, "type": "array" }, "migs_org_air_data": { "description": "Data that complies with MigsOrg combined with Air", "items": { "$ref": "#/$defs/MigsOrgAir" }, "type": "array" }, "migs_org_built_environment_data": { "description": "Data that complies with MigsOrg combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsOrgBuiltEnvironment" }, "type": "array" }, "migs_org_data": { "description": "Data that complies with checklist MigsOrg", "items": { "$ref": "#/$defs/MigsOrg" }, "type": "array" }, "migs_org_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsOrg combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsOrgFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_org_food_farm_environment_data": { "description": "Data that complies with MigsOrg combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsOrgFoodFarmEnvironment" }, "type": "array" }, "migs_org_food_food_production_facility_data": { "description": "Data that complies with MigsOrg combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsOrgFoodFoodProductionFacility" }, "type": "array" }, "migs_org_food_human_foods_data": { "description": "Data that complies with MigsOrg combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsOrgFoodHumanFoods" }, "type": "array" }, "migs_org_host_associated_data": { "description": "Data that complies with MigsOrg combined with HostAssociated", "items": { "$ref": "#/$defs/MigsOrgHostAssociated" }, "type": "array" }, "migs_org_human_associated_data": { "description": "Data that complies with MigsOrg combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsOrgHumanAssociated" }, "type": "array" }, "migs_org_human_gut_data": { "description": "Data that complies with MigsOrg combined with HumanGut", "items": { "$ref": "#/$defs/MigsOrgHumanGut" }, "type": "array" }, "migs_org_human_oral_data": { "description": "Data that complies with MigsOrg combined with HumanOral", "items": { "$ref": "#/$defs/MigsOrgHumanOral" }, "type": "array" }, "migs_org_human_skin_data": { "description": "Data that complies with MigsOrg combined with HumanSkin", "items": { "$ref": "#/$defs/MigsOrgHumanSkin" }, "type": "array" }, "migs_org_human_vaginal_data": { "description": "Data that complies with MigsOrg combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsOrgHumanVaginal" }, "type": "array" }, "migs_org_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsOrg combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsOrgHydrocarbonResourcesCores" }, "type": "array" }, "migs_org_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsOrg combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsOrgHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_org_microbial_mat_biofilm_data": { "description": "Data that complies with MigsOrg combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsOrgMicrobialMatBiofilm" }, "type": "array" }, "migs_org_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsOrg combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsOrgMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_org_plant_associated_data": { "description": "Data that complies with MigsOrg combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsOrgPlantAssociated" }, "type": "array" }, "migs_org_sediment_data": { "description": "Data that complies with MigsOrg combined with Sediment", "items": { "$ref": "#/$defs/MigsOrgSediment" }, "type": "array" }, "migs_org_soil_data": { "description": "Data that complies with MigsOrg combined with Soil", "items": { "$ref": "#/$defs/MigsOrgSoil" }, "type": "array" }, "migs_org_symbiont_associated_data": { "description": "Data that complies with MigsOrg combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsOrgSymbiontAssociated" }, "type": "array" }, "migs_org_wastewater_sludge_data": { "description": "Data that complies with MigsOrg combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsOrgWastewaterSludge" }, "type": "array" }, "migs_org_water_data": { "description": "Data that complies with MigsOrg combined with Water", "items": { "$ref": "#/$defs/MigsOrgWater" }, "type": "array" }, "migs_pl_agriculture_data": { "description": "Data that complies with MigsPl combined with Agriculture", "items": { "$ref": "#/$defs/MigsPlAgriculture" }, "type": "array" }, "migs_pl_air_data": { "description": "Data that complies with MigsPl combined with Air", "items": { "$ref": "#/$defs/MigsPlAir" }, "type": "array" }, "migs_pl_built_environment_data": { "description": "Data that complies with MigsPl combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsPlBuiltEnvironment" }, "type": "array" }, "migs_pl_data": { "description": "Data that complies with checklist MigsPl", "items": { "$ref": "#/$defs/MigsPl" }, "type": "array" }, "migs_pl_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsPl combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsPlFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_pl_food_farm_environment_data": { "description": "Data that complies with MigsPl combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsPlFoodFarmEnvironment" }, "type": "array" }, "migs_pl_food_food_production_facility_data": { "description": "Data that complies with MigsPl combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsPlFoodFoodProductionFacility" }, "type": "array" }, "migs_pl_food_human_foods_data": { "description": "Data that complies with MigsPl combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsPlFoodHumanFoods" }, "type": "array" }, "migs_pl_host_associated_data": { "description": "Data that complies with MigsPl combined with HostAssociated", "items": { "$ref": "#/$defs/MigsPlHostAssociated" }, "type": "array" }, "migs_pl_human_associated_data": { "description": "Data that complies with MigsPl combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsPlHumanAssociated" }, "type": "array" }, "migs_pl_human_gut_data": { "description": "Data that complies with MigsPl combined with HumanGut", "items": { "$ref": "#/$defs/MigsPlHumanGut" }, "type": "array" }, "migs_pl_human_oral_data": { "description": "Data that complies with MigsPl combined with HumanOral", "items": { "$ref": "#/$defs/MigsPlHumanOral" }, "type": "array" }, "migs_pl_human_skin_data": { "description": "Data that complies with MigsPl combined with HumanSkin", "items": { "$ref": "#/$defs/MigsPlHumanSkin" }, "type": "array" }, "migs_pl_human_vaginal_data": { "description": "Data that complies with MigsPl combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsPlHumanVaginal" }, "type": "array" }, "migs_pl_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsPl combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsPlHydrocarbonResourcesCores" }, "type": "array" }, "migs_pl_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsPl combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsPlHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_pl_microbial_mat_biofilm_data": { "description": "Data that complies with MigsPl combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsPlMicrobialMatBiofilm" }, "type": "array" }, "migs_pl_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsPl combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsPlMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_pl_plant_associated_data": { "description": "Data that complies with MigsPl combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsPlPlantAssociated" }, "type": "array" }, "migs_pl_sediment_data": { "description": "Data that complies with MigsPl combined with Sediment", "items": { "$ref": "#/$defs/MigsPlSediment" }, "type": "array" }, "migs_pl_soil_data": { "description": "Data that complies with MigsPl combined with Soil", "items": { "$ref": "#/$defs/MigsPlSoil" }, "type": "array" }, "migs_pl_symbiont_associated_data": { "description": "Data that complies with MigsPl combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsPlSymbiontAssociated" }, "type": "array" }, "migs_pl_wastewater_sludge_data": { "description": "Data that complies with MigsPl combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsPlWastewaterSludge" }, "type": "array" }, "migs_pl_water_data": { "description": "Data that complies with MigsPl combined with Water", "items": { "$ref": "#/$defs/MigsPlWater" }, "type": "array" }, "migs_vi_agriculture_data": { "description": "Data that complies with MigsVi combined with Agriculture", "items": { "$ref": "#/$defs/MigsViAgriculture" }, "type": "array" }, "migs_vi_air_data": { "description": "Data that complies with MigsVi combined with Air", "items": { "$ref": "#/$defs/MigsViAir" }, "type": "array" }, "migs_vi_built_environment_data": { "description": "Data that complies with MigsVi combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsViBuiltEnvironment" }, "type": "array" }, "migs_vi_data": { "description": "Data that complies with checklist MigsVi", "items": { "$ref": "#/$defs/MigsVi" }, "type": "array" }, "migs_vi_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsVi combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsViFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_vi_food_farm_environment_data": { "description": "Data that complies with MigsVi combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsViFoodFarmEnvironment" }, "type": "array" }, "migs_vi_food_food_production_facility_data": { "description": "Data that complies with MigsVi combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsViFoodFoodProductionFacility" }, "type": "array" }, "migs_vi_food_human_foods_data": { "description": "Data that complies with MigsVi combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsViFoodHumanFoods" }, "type": "array" }, "migs_vi_host_associated_data": { "description": "Data that complies with MigsVi combined with HostAssociated", "items": { "$ref": "#/$defs/MigsViHostAssociated" }, "type": "array" }, "migs_vi_human_associated_data": { "description": "Data that complies with MigsVi combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsViHumanAssociated" }, "type": "array" }, "migs_vi_human_gut_data": { "description": "Data that complies with MigsVi combined with HumanGut", "items": { "$ref": "#/$defs/MigsViHumanGut" }, "type": "array" }, "migs_vi_human_oral_data": { "description": "Data that complies with MigsVi combined with HumanOral", "items": { "$ref": "#/$defs/MigsViHumanOral" }, "type": "array" }, "migs_vi_human_skin_data": { "description": "Data that complies with MigsVi combined with HumanSkin", "items": { "$ref": "#/$defs/MigsViHumanSkin" }, "type": "array" }, "migs_vi_human_vaginal_data": { "description": "Data that complies with MigsVi combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsViHumanVaginal" }, "type": "array" }, "migs_vi_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsVi combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsViHydrocarbonResourcesCores" }, "type": "array" }, "migs_vi_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsVi combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsViHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_vi_microbial_mat_biofilm_data": { "description": "Data that complies with MigsVi combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsViMicrobialMatBiofilm" }, "type": "array" }, "migs_vi_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsVi combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsViMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_vi_plant_associated_data": { "description": "Data that complies with MigsVi combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsViPlantAssociated" }, "type": "array" }, "migs_vi_sediment_data": { "description": "Data that complies with MigsVi combined with Sediment", "items": { "$ref": "#/$defs/MigsViSediment" }, "type": "array" }, "migs_vi_soil_data": { "description": "Data that complies with MigsVi combined with Soil", "items": { "$ref": "#/$defs/MigsViSoil" }, "type": "array" }, "migs_vi_symbiont_associated_data": { "description": "Data that complies with MigsVi combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsViSymbiontAssociated" }, "type": "array" }, "migs_vi_wastewater_sludge_data": { "description": "Data that complies with MigsVi combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsViWastewaterSludge" }, "type": "array" }, "migs_vi_water_data": { "description": "Data that complies with MigsVi combined with Water", "items": { "$ref": "#/$defs/MigsViWater" }, "type": "array" }, "mimag_agriculture_data": { "description": "Data that complies with Mimag combined with Agriculture", "items": { "$ref": "#/$defs/MimagAgriculture" }, "type": "array" }, "mimag_air_data": { "description": "Data that complies with Mimag combined with Air", "items": { "$ref": "#/$defs/MimagAir" }, "type": "array" }, "mimag_built_environment_data": { "description": "Data that complies with Mimag combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimagBuiltEnvironment" }, "type": "array" }, "mimag_data": { "description": "Data that complies with checklist Mimag", "items": { "$ref": "#/$defs/Mimag" }, "type": "array" }, "mimag_food_animal_and_animal_feed_data": { "description": "Data that complies with Mimag combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimagFoodAnimalAndAnimalFeed" }, "type": "array" }, "mimag_food_farm_environment_data": { "description": "Data that complies with Mimag combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimagFoodFarmEnvironment" }, "type": "array" }, "mimag_food_food_production_facility_data": { "description": "Data that complies with Mimag combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimagFoodFoodProductionFacility" }, "type": "array" }, "mimag_food_human_foods_data": { "description": "Data that complies with Mimag combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimagFoodHumanFoods" }, "type": "array" }, "mimag_host_associated_data": { "description": "Data that complies with Mimag combined with HostAssociated", "items": { "$ref": "#/$defs/MimagHostAssociated" }, "type": "array" }, "mimag_human_associated_data": { "description": "Data that complies with Mimag combined with HumanAssociated", "items": { "$ref": "#/$defs/MimagHumanAssociated" }, "type": "array" }, "mimag_human_gut_data": { "description": "Data that complies with Mimag combined with HumanGut", "items": { "$ref": "#/$defs/MimagHumanGut" }, "type": "array" }, "mimag_human_oral_data": { "description": "Data that complies with Mimag combined with HumanOral", "items": { "$ref": "#/$defs/MimagHumanOral" }, "type": "array" }, "mimag_human_skin_data": { "description": "Data that complies with Mimag combined with HumanSkin", "items": { "$ref": "#/$defs/MimagHumanSkin" }, "type": "array" }, "mimag_human_vaginal_data": { "description": "Data that complies with Mimag combined with HumanVaginal", "items": { "$ref": "#/$defs/MimagHumanVaginal" }, "type": "array" }, "mimag_hydrocarbon_resources_cores_data": { "description": "Data that complies with Mimag combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimagHydrocarbonResourcesCores" }, "type": "array" }, "mimag_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Mimag combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimagHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mimag_microbial_mat_biofilm_data": { "description": "Data that complies with Mimag combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimagMicrobialMatBiofilm" }, "type": "array" }, "mimag_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Mimag combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimagMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mimag_plant_associated_data": { "description": "Data that complies with Mimag combined with PlantAssociated", "items": { "$ref": "#/$defs/MimagPlantAssociated" }, "type": "array" }, "mimag_sediment_data": { "description": "Data that complies with Mimag combined with Sediment", "items": { "$ref": "#/$defs/MimagSediment" }, "type": "array" }, "mimag_soil_data": { "description": "Data that complies with Mimag combined with Soil", "items": { "$ref": "#/$defs/MimagSoil" }, "type": "array" }, "mimag_symbiont_associated_data": { "description": "Data that complies with Mimag combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimagSymbiontAssociated" }, "type": "array" }, "mimag_wastewater_sludge_data": { "description": "Data that complies with Mimag combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimagWastewaterSludge" }, "type": "array" }, "mimag_water_data": { "description": "Data that complies with Mimag combined with Water", "items": { "$ref": "#/$defs/MimagWater" }, "type": "array" }, "mimarks_c_agriculture_data": { "description": "Data that complies with MimarksC combined with Agriculture", "items": { "$ref": "#/$defs/MimarksCAgriculture" }, "type": "array" }, "mimarks_c_air_data": { "description": "Data that complies with MimarksC combined with Air", "items": { "$ref": "#/$defs/MimarksCAir" }, "type": "array" }, "mimarks_c_built_environment_data": { "description": "Data that complies with MimarksC combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimarksCBuiltEnvironment" }, "type": "array" }, "mimarks_c_data": { "description": "Data that complies with checklist MimarksC", "items": { "$ref": "#/$defs/MimarksC" }, "type": "array" }, "mimarks_c_food_animal_and_animal_feed_data": { "description": "Data that complies with MimarksC combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimarksCFoodAnimalAndAnimalFeed" }, "type": "array" }, "mimarks_c_food_farm_environment_data": { "description": "Data that complies with MimarksC combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimarksCFoodFarmEnvironment" }, "type": "array" }, "mimarks_c_food_food_production_facility_data": { "description": "Data that complies with MimarksC combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimarksCFoodFoodProductionFacility" }, "type": "array" }, "mimarks_c_food_human_foods_data": { "description": "Data that complies with MimarksC combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimarksCFoodHumanFoods" }, "type": "array" }, "mimarks_c_host_associated_data": { "description": "Data that complies with MimarksC combined with HostAssociated", "items": { "$ref": "#/$defs/MimarksCHostAssociated" }, "type": "array" }, "mimarks_c_human_associated_data": { "description": "Data that complies with MimarksC combined with HumanAssociated", "items": { "$ref": "#/$defs/MimarksCHumanAssociated" }, "type": "array" }, "mimarks_c_human_gut_data": { "description": "Data that complies with MimarksC combined with HumanGut", "items": { "$ref": "#/$defs/MimarksCHumanGut" }, "type": "array" }, "mimarks_c_human_oral_data": { "description": "Data that complies with MimarksC combined with HumanOral", "items": { "$ref": "#/$defs/MimarksCHumanOral" }, "type": "array" }, "mimarks_c_human_skin_data": { "description": "Data that complies with MimarksC combined with HumanSkin", "items": { "$ref": "#/$defs/MimarksCHumanSkin" }, "type": "array" }, "mimarks_c_human_vaginal_data": { "description": "Data that complies with MimarksC combined with HumanVaginal", "items": { "$ref": "#/$defs/MimarksCHumanVaginal" }, "type": "array" }, "mimarks_c_hydrocarbon_resources_cores_data": { "description": "Data that complies with MimarksC combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimarksCHydrocarbonResourcesCores" }, "type": "array" }, "mimarks_c_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MimarksC combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimarksCHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mimarks_c_microbial_mat_biofilm_data": { "description": "Data that complies with MimarksC combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimarksCMicrobialMatBiofilm" }, "type": "array" }, "mimarks_c_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MimarksC combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimarksCMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mimarks_c_plant_associated_data": { "description": "Data that complies with MimarksC combined with PlantAssociated", "items": { "$ref": "#/$defs/MimarksCPlantAssociated" }, "type": "array" }, "mimarks_c_sediment_data": { "description": "Data that complies with MimarksC combined with Sediment", "items": { "$ref": "#/$defs/MimarksCSediment" }, "type": "array" }, "mimarks_c_soil_data": { "description": "Data that complies with MimarksC combined with Soil", "items": { "$ref": "#/$defs/MimarksCSoil" }, "type": "array" }, "mimarks_c_symbiont_associated_data": { "description": "Data that complies with MimarksC combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimarksCSymbiontAssociated" }, "type": "array" }, "mimarks_c_wastewater_sludge_data": { "description": "Data that complies with MimarksC combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimarksCWastewaterSludge" }, "type": "array" }, "mimarks_c_water_data": { "description": "Data that complies with MimarksC combined with Water", "items": { "$ref": "#/$defs/MimarksCWater" }, "type": "array" }, "mimarks_s_agriculture_data": { "description": "Data that complies with MimarksS combined with Agriculture", "items": { "$ref": "#/$defs/MimarksSAgriculture" }, "type": "array" }, "mimarks_s_air_data": { "description": "Data that complies with MimarksS combined with Air", "items": { "$ref": "#/$defs/MimarksSAir" }, "type": "array" }, "mimarks_s_built_environment_data": { "description": "Data that complies with MimarksS combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimarksSBuiltEnvironment" }, "type": "array" }, "mimarks_s_data": { "description": "Data that complies with checklist MimarksS", "items": { "$ref": "#/$defs/MimarksS" }, "type": "array" }, "mimarks_s_food_animal_and_animal_feed_data": { "description": "Data that complies with MimarksS combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimarksSFoodAnimalAndAnimalFeed" }, "type": "array" }, "mimarks_s_food_farm_environment_data": { "description": "Data that complies with MimarksS combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimarksSFoodFarmEnvironment" }, "type": "array" }, "mimarks_s_food_food_production_facility_data": { "description": "Data that complies with MimarksS combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimarksSFoodFoodProductionFacility" }, "type": "array" }, "mimarks_s_food_human_foods_data": { "description": "Data that complies with MimarksS combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimarksSFoodHumanFoods" }, "type": "array" }, "mimarks_s_host_associated_data": { "description": "Data that complies with MimarksS combined with HostAssociated", "items": { "$ref": "#/$defs/MimarksSHostAssociated" }, "type": "array" }, "mimarks_s_human_associated_data": { "description": "Data that complies with MimarksS combined with HumanAssociated", "items": { "$ref": "#/$defs/MimarksSHumanAssociated" }, "type": "array" }, "mimarks_s_human_gut_data": { "description": "Data that complies with MimarksS combined with HumanGut", "items": { "$ref": "#/$defs/MimarksSHumanGut" }, "type": "array" }, "mimarks_s_human_oral_data": { "description": "Data that complies with MimarksS combined with HumanOral", "items": { "$ref": "#/$defs/MimarksSHumanOral" }, "type": "array" }, "mimarks_s_human_skin_data": { "description": "Data that complies with MimarksS combined with HumanSkin", "items": { "$ref": "#/$defs/MimarksSHumanSkin" }, "type": "array" }, "mimarks_s_human_vaginal_data": { "description": "Data that complies with MimarksS combined with HumanVaginal", "items": { "$ref": "#/$defs/MimarksSHumanVaginal" }, "type": "array" }, "mimarks_s_hydrocarbon_resources_cores_data": { "description": "Data that complies with MimarksS combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimarksSHydrocarbonResourcesCores" }, "type": "array" }, "mimarks_s_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MimarksS combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimarksSHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mimarks_s_microbial_mat_biofilm_data": { "description": "Data that complies with MimarksS combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimarksSMicrobialMatBiofilm" }, "type": "array" }, "mimarks_s_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MimarksS combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimarksSMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mimarks_s_plant_associated_data": { "description": "Data that complies with MimarksS combined with PlantAssociated", "items": { "$ref": "#/$defs/MimarksSPlantAssociated" }, "type": "array" }, "mimarks_s_sediment_data": { "description": "Data that complies with MimarksS combined with Sediment", "items": { "$ref": "#/$defs/MimarksSSediment" }, "type": "array" }, "mimarks_s_soil_data": { "description": "Data that complies with MimarksS combined with Soil", "items": { "$ref": "#/$defs/MimarksSSoil" }, "type": "array" }, "mimarks_s_symbiont_associated_data": { "description": "Data that complies with MimarksS combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimarksSSymbiontAssociated" }, "type": "array" }, "mimarks_s_wastewater_sludge_data": { "description": "Data that complies with MimarksS combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimarksSWastewaterSludge" }, "type": "array" }, "mimarks_s_water_data": { "description": "Data that complies with MimarksS combined with Water", "items": { "$ref": "#/$defs/MimarksSWater" }, "type": "array" }, "mims_agriculture_data": { "description": "Data that complies with Mims combined with Agriculture", "items": { "$ref": "#/$defs/MimsAgriculture" }, "type": "array" }, "mims_air_data": { "description": "Data that complies with Mims combined with Air", "items": { "$ref": "#/$defs/MimsAir" }, "type": "array" }, "mims_built_environment_data": { "description": "Data that complies with Mims combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimsBuiltEnvironment" }, "type": "array" }, "mims_data": { "description": "Data that complies with checklist Mims", "items": { "$ref": "#/$defs/Mims" }, "type": "array" }, "mims_food_animal_and_animal_feed_data": { "description": "Data that complies with Mims combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimsFoodAnimalAndAnimalFeed" }, "type": "array" }, "mims_food_farm_environment_data": { "description": "Data that complies with Mims combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimsFoodFarmEnvironment" }, "type": "array" }, "mims_food_food_production_facility_data": { "description": "Data that complies with Mims combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimsFoodFoodProductionFacility" }, "type": "array" }, "mims_food_human_foods_data": { "description": "Data that complies with Mims combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimsFoodHumanFoods" }, "type": "array" }, "mims_host_associated_data": { "description": "Data that complies with Mims combined with HostAssociated", "items": { "$ref": "#/$defs/MimsHostAssociated" }, "type": "array" }, "mims_human_associated_data": { "description": "Data that complies with Mims combined with HumanAssociated", "items": { "$ref": "#/$defs/MimsHumanAssociated" }, "type": "array" }, "mims_human_gut_data": { "description": "Data that complies with Mims combined with HumanGut", "items": { "$ref": "#/$defs/MimsHumanGut" }, "type": "array" }, "mims_human_oral_data": { "description": "Data that complies with Mims combined with HumanOral", "items": { "$ref": "#/$defs/MimsHumanOral" }, "type": "array" }, "mims_human_skin_data": { "description": "Data that complies with Mims combined with HumanSkin", "items": { "$ref": "#/$defs/MimsHumanSkin" }, "type": "array" }, "mims_human_vaginal_data": { "description": "Data that complies with Mims combined with HumanVaginal", "items": { "$ref": "#/$defs/MimsHumanVaginal" }, "type": "array" }, "mims_hydrocarbon_resources_cores_data": { "description": "Data that complies with Mims combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimsHydrocarbonResourcesCores" }, "type": "array" }, "mims_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Mims combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimsHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mims_microbial_mat_biofilm_data": { "description": "Data that complies with Mims combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimsMicrobialMatBiofilm" }, "type": "array" }, "mims_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Mims combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimsMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mims_plant_associated_data": { "description": "Data that complies with Mims combined with PlantAssociated", "items": { "$ref": "#/$defs/MimsPlantAssociated" }, "type": "array" }, "mims_sediment_data": { "description": "Data that complies with Mims combined with Sediment", "items": { "$ref": "#/$defs/MimsSediment" }, "type": "array" }, "mims_soil_data": { "description": "Data that complies with Mims combined with Soil", "items": { "$ref": "#/$defs/MimsSoil" }, "type": "array" }, "mims_symbiont_associated_data": { "description": "Data that complies with Mims combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimsSymbiontAssociated" }, "type": "array" }, "mims_wastewater_sludge_data": { "description": "Data that complies with Mims combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimsWastewaterSludge" }, "type": "array" }, "mims_water_data": { "description": "Data that complies with Mims combined with Water", "items": { "$ref": "#/$defs/MimsWater" }, "type": "array" }, "misag_agriculture_data": { "description": "Data that complies with Misag combined with Agriculture", "items": { "$ref": "#/$defs/MisagAgriculture" }, "type": "array" }, "misag_air_data": { "description": "Data that complies with Misag combined with Air", "items": { "$ref": "#/$defs/MisagAir" }, "type": "array" }, "misag_built_environment_data": { "description": "Data that complies with Misag combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MisagBuiltEnvironment" }, "type": "array" }, "misag_data": { "description": "Data that complies with checklist Misag", "items": { "$ref": "#/$defs/Misag" }, "type": "array" }, "misag_food_animal_and_animal_feed_data": { "description": "Data that complies with Misag combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MisagFoodAnimalAndAnimalFeed" }, "type": "array" }, "misag_food_farm_environment_data": { "description": "Data that complies with Misag combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MisagFoodFarmEnvironment" }, "type": "array" }, "misag_food_food_production_facility_data": { "description": "Data that complies with Misag combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MisagFoodFoodProductionFacility" }, "type": "array" }, "misag_food_human_foods_data": { "description": "Data that complies with Misag combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MisagFoodHumanFoods" }, "type": "array" }, "misag_host_associated_data": { "description": "Data that complies with Misag combined with HostAssociated", "items": { "$ref": "#/$defs/MisagHostAssociated" }, "type": "array" }, "misag_human_associated_data": { "description": "Data that complies with Misag combined with HumanAssociated", "items": { "$ref": "#/$defs/MisagHumanAssociated" }, "type": "array" }, "misag_human_gut_data": { "description": "Data that complies with Misag combined with HumanGut", "items": { "$ref": "#/$defs/MisagHumanGut" }, "type": "array" }, "misag_human_oral_data": { "description": "Data that complies with Misag combined with HumanOral", "items": { "$ref": "#/$defs/MisagHumanOral" }, "type": "array" }, "misag_human_skin_data": { "description": "Data that complies with Misag combined with HumanSkin", "items": { "$ref": "#/$defs/MisagHumanSkin" }, "type": "array" }, "misag_human_vaginal_data": { "description": "Data that complies with Misag combined with HumanVaginal", "items": { "$ref": "#/$defs/MisagHumanVaginal" }, "type": "array" }, "misag_hydrocarbon_resources_cores_data": { "description": "Data that complies with Misag combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MisagHydrocarbonResourcesCores" }, "type": "array" }, "misag_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Misag combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MisagHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "misag_microbial_mat_biofilm_data": { "description": "Data that complies with Misag combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MisagMicrobialMatBiofilm" }, "type": "array" }, "misag_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Misag combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MisagMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "misag_plant_associated_data": { "description": "Data that complies with Misag combined with PlantAssociated", "items": { "$ref": "#/$defs/MisagPlantAssociated" }, "type": "array" }, "misag_sediment_data": { "description": "Data that complies with Misag combined with Sediment", "items": { "$ref": "#/$defs/MisagSediment" }, "type": "array" }, "misag_soil_data": { "description": "Data that complies with Misag combined with Soil", "items": { "$ref": "#/$defs/MisagSoil" }, "type": "array" }, "misag_symbiont_associated_data": { "description": "Data that complies with Misag combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MisagSymbiontAssociated" }, "type": "array" }, "misag_wastewater_sludge_data": { "description": "Data that complies with Misag combined with WastewaterSludge", "items": { "$ref": "#/$defs/MisagWastewaterSludge" }, "type": "array" }, "misag_water_data": { "description": "Data that complies with Misag combined with Water", "items": { "$ref": "#/$defs/MisagWater" }, "type": "array" }, "miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Extension MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "miuvig_agriculture_data": { "description": "Data that complies with Miuvig combined with Agriculture", "items": { "$ref": "#/$defs/MiuvigAgriculture" }, "type": "array" }, "miuvig_air_data": { "description": "Data that complies with Miuvig combined with Air", "items": { "$ref": "#/$defs/MiuvigAir" }, "type": "array" }, "miuvig_built_environment_data": { "description": "Data that complies with Miuvig combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MiuvigBuiltEnvironment" }, "type": "array" }, "miuvig_data": { "description": "Data that complies with checklist Miuvig", "items": { "$ref": "#/$defs/Miuvig" }, "type": "array" }, "miuvig_food_animal_and_animal_feed_data": { "description": "Data that complies with Miuvig combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MiuvigFoodAnimalAndAnimalFeed" }, "type": "array" }, "miuvig_food_farm_environment_data": { "description": "Data that complies with Miuvig combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MiuvigFoodFarmEnvironment" }, "type": "array" }, "miuvig_food_food_production_facility_data": { "description": "Data that complies with Miuvig combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MiuvigFoodFoodProductionFacility" }, "type": "array" }, "miuvig_food_human_foods_data": { "description": "Data that complies with Miuvig combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MiuvigFoodHumanFoods" }, "type": "array" }, "miuvig_host_associated_data": { "description": "Data that complies with Miuvig combined with HostAssociated", "items": { "$ref": "#/$defs/MiuvigHostAssociated" }, "type": "array" }, "miuvig_human_associated_data": { "description": "Data that complies with Miuvig combined with HumanAssociated", "items": { "$ref": "#/$defs/MiuvigHumanAssociated" }, "type": "array" }, "miuvig_human_gut_data": { "description": "Data that complies with Miuvig combined with HumanGut", "items": { "$ref": "#/$defs/MiuvigHumanGut" }, "type": "array" }, "miuvig_human_oral_data": { "description": "Data that complies with Miuvig combined with HumanOral", "items": { "$ref": "#/$defs/MiuvigHumanOral" }, "type": "array" }, "miuvig_human_skin_data": { "description": "Data that complies with Miuvig combined with HumanSkin", "items": { "$ref": "#/$defs/MiuvigHumanSkin" }, "type": "array" }, "miuvig_human_vaginal_data": { "description": "Data that complies with Miuvig combined with HumanVaginal", "items": { "$ref": "#/$defs/MiuvigHumanVaginal" }, "type": "array" }, "miuvig_hydrocarbon_resources_cores_data": { "description": "Data that complies with Miuvig combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MiuvigHydrocarbonResourcesCores" }, "type": "array" }, "miuvig_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Miuvig combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MiuvigHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "miuvig_microbial_mat_biofilm_data": { "description": "Data that complies with Miuvig combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MiuvigMicrobialMatBiofilm" }, "type": "array" }, "miuvig_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Miuvig combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MiuvigMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "miuvig_plant_associated_data": { "description": "Data that complies with Miuvig combined with PlantAssociated", "items": { "$ref": "#/$defs/MiuvigPlantAssociated" }, "type": "array" }, "miuvig_sediment_data": { "description": "Data that complies with Miuvig combined with Sediment", "items": { "$ref": "#/$defs/MiuvigSediment" }, "type": "array" }, "miuvig_soil_data": { "description": "Data that complies with Miuvig combined with Soil", "items": { "$ref": "#/$defs/MiuvigSoil" }, "type": "array" }, "miuvig_symbiont_associated_data": { "description": "Data that complies with Miuvig combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MiuvigSymbiontAssociated" }, "type": "array" }, "miuvig_wastewater_sludge_data": { "description": "Data that complies with Miuvig combined with WastewaterSludge", "items": { "$ref": "#/$defs/MiuvigWastewaterSludge" }, "type": "array" }, "miuvig_water_data": { "description": "Data that complies with Miuvig combined with Water", "items": { "$ref": "#/$defs/MiuvigWater" }, "type": "array" }, "plant_associated_data": { "description": "Data that complies with Extension PlantAssociated", "items": { "$ref": "#/$defs/PlantAssociated" }, "type": "array" }, "sediment_data": { "description": "Data that complies with Extension Sediment", "items": { "$ref": "#/$defs/Sediment" }, "type": "array" }, "soil_data": { "description": "Data that complies with Extension Soil", "items": { "$ref": "#/$defs/Soil" }, "type": "array" }, "symbiont_associated_data": { "description": "Data that complies with Extension SymbiontAssociated", "items": { "$ref": "#/$defs/SymbiontAssociated" }, "type": "array" }, "wastewater_sludge_data": { "description": "Data that complies with Extension WastewaterSludge", "items": { "$ref": "#/$defs/WastewaterSludge" }, "type": "array" }, "water_data": { "description": "Data that complies with Extension Water", "items": { "$ref": "#/$defs/Water" }, "type": "array" } }, "title": "MixsCompliantData", "type": "object" }, "NEGCONTTYPEENUM": { "description": "", "enum": [ "DNA-free PCR mix", "distilled water", "empty collection device", "empty collection tube", "phosphate buffer", "sterile swab", "sterile syringe" ], "title": "NEGCONTTYPEENUM", "type": "string" }, "OCCUPDOCUMENTENUM": { "description": "", "enum": [ "automated count", "estimate", "manual count", "videos" ], "title": "OCCUPDOCUMENTENUM", "type": "string" }, "OXYSTATSAMPENUM": { "description": "", "enum": [ "aerobic", "anaerobic", "other" ], "title": "OXYSTATSAMPENUM", "type": "string" }, "PLANTREPRODCROPENUM": { "description": "", "enum": [ "plant cutting", "pregerminated seed", "ratoon", "seed", "seedling", "whole mature plant" ], "title": "PLANTREPRODCROPENUM", "type": "string" }, "PLANTSEXENUM": { "description": "", "enum": [ "Androdioecious", "Androecious", "Androgynomonoecious", "Androgynous", "Andromonoecious", "Bisexual", "Dichogamous", "Diclinous", "Dioecious", "Gynodioecious", "Gynoecious", "Gynomonoecious", "Hermaphroditic", "Imperfect", "Monoclinous", "Monoecious", "Perfect", "Polygamodioecious", "Polygamomonoecious", "Polygamous", "Protandrous", "Protogynous", "Subandroecious", "Subdioecious", "Subgynoecious", "Synoecious", "Trimonoecious", "Trioecious", "Unisexual" ], "title": "PLANTSEXENUM", "type": "string" }, "PREDGENOMESTRUCENUM": { "description": "", "enum": [ "non-segmented", "segmented", "undetermined" ], "title": "PREDGENOMESTRUCENUM", "type": "string" }, "PROFILEPOSITIONENUM": { "description": "", "enum": [ "backslope", "footslope", "shoulder", "summit", "toeslope" ], "title": "PROFILEPOSITIONENUM", "type": "string" }, "PlantAssociated": { "additionalProperties": false, "description": "plant-associated extension", "properties": { "air_temp_regm": { "description": "Information about treatment involving an exposure to varying temperatures; should include the temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include different temperature regimens", "items": { "type": "string" }, "type": "array" }, "ances_data": { "description": "Information about either pedigree or other ancestral information description (e.g. parental variety in case of mutant or selection), e.g. A/3*B (meaning [(A x B) x B] x B)", "type": "string" }, "antibiotic_regm": { "description": "Information about treatment involving antibiotic administration; should include the name of antibiotic, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple antibiotic regimens", "items": { "type": "string" }, "type": "array" }, "biol_stat": { "description": "The level of genome modification", "type": "string" }, "biotic_regm": { "description": "Information about treatment(s) involving use of biotic factors, such as bacteria, viruses or fungi", "items": { "type": "string" }, "type": "array" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_mutagen": { "description": "Treatment involving use of mutagens; should include the name of mutagen, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mutagen regimens", "items": { "type": "string" }, "type": "array" }, "climate_environment": { "description": "Treatment involving an exposure to a particular climate; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple climates", "items": { "type": "string" }, "type": "array" }, "cult_root_med": { "description": "Name or reference for the hydroponic or in vitro culture rooting medium; can be the name of a commonly used medium or reference to a specific medium, e.g. Murashige and Skoog medium. If the medium has not been formally published, use the rooting medium descriptors", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fertilizer_regm": { "description": "Information about treatment involving the use of fertilizers; should include the name of fertilizer, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fertilizer regimens", "items": { "type": "string" }, "type": "array" }, "fungicide_regm": { "description": "Information about treatment involving use of fungicides; should include the name of fungicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple fungicide regimens", "items": { "type": "string" }, "type": "array" }, "gaseous_environment": { "description": "Use of conditions with differing gaseous environments; should include the name of gaseous compound, amount administered, treatment duration, interval and total experimental duration; can include multiple gaseous environment regimens", "items": { "type": "string" }, "type": "array" }, "genetic_mod": { "description": "Genetic modifications of the genome of an organism, which may occur naturally by spontaneous mutation, or be introduced by some experimental means, e.g. specification of a transgene or the gene knocked-out or details of transient transfection", "type": "string" }, "gravity": { "description": "Information about treatment involving use of gravity factor to study various types of responses in presence, absence or modified levels of gravity; treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple treatments", "items": { "type": "string" }, "type": "array" }, "growth_facil": { "description": "Type of facility where the sampled plant was grown; controlled vocabulary: growth chamber, open top chamber, glasshouse, experimental garden, field. Alternatively use Crop Ontology (CO) terms, see http://www.cropontology.org/ontology/CO_715/Crop%20Research", "type": "string" }, "growth_habit": { "$ref": "#/$defs/GROWTHHABITENUM", "description": "Characteristic shape, appearance or growth form of a plant species" }, "growth_hormone_regm": { "description": "Information about treatment involving use of growth hormones; should include the name of growth hormone, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple growth hormone regimens", "items": { "type": "string" }, "type": "array" }, "herbicide_regm": { "description": "Information about treatment involving use of herbicides; information about treatment involving use of growth hormones; should include the name of herbicide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_disease_stat": { "description": "List of diseases with which the host has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "type": "string" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_subspecf_genlin": { "description": "Information about the genetic distinctness of the host organism below the subspecies level e.g., serovar, serotype, biotype, ecotype, variety, cultivar, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123", "items": { "type": "string" }, "type": "array" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_wet_mass": { "description": "Measurement of wet mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "humidity_regm": { "description": "Information about treatment involving an exposure to varying degree of humidity; information about treatment involving use of growth hormones; should include amount of humidity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "light_regm": { "description": "Information about treatment(s) involving exposure to light, including both light intensity and quality", "type": "string" }, "mechanical_damage": { "description": "Information about any mechanical damage exerted on the plant; can include multiple damages and sites", "items": { "type": "string" }, "type": "array" }, "mineral_nutr_regm": { "description": "Information about treatment involving the use of mineral supplements; should include the name of mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "non_min_nutr_regm": { "description": "Information about treatment involving the exposure of plant to non-mineral nutrient such as oxygen, hydrogen or carbon; should include the name of non-mineral nutrient, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple non-mineral nutrient regimens", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "pesticide_regm": { "description": "Information about treatment involving use of insecticides; should include the name of pesticide, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple pesticide regimens", "items": { "type": "string" }, "type": "array" }, "ph_regm": { "description": "Information about treatment involving exposure of plants to varying levels of ph of the growth media, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimen", "items": { "type": "string" }, "type": "array" }, "plant_growth_med": { "description": "Specification of the media for growing the plants or tissue cultured samples, e.g. soil, aeroponic, hydroponic, in vitro solid culture medium, in vitro liquid culture medium. Recommended value is a specific value from EO:plant growth medium (follow this link for terms http://purl.obolibrary.org/obo/EO_0007147) or other controlled vocabulary", "type": "string" }, "plant_product": { "description": "Substance produced by the plant, where the sample was obtained from", "type": "string" }, "plant_sex": { "$ref": "#/$defs/PLANTSEXENUM", "description": "Sex of the reproductive parts on the whole plant, e.g. pistillate, staminate, monoecieous, hermaphrodite" }, "plant_struc": { "description": "Name of plant structure the sample was obtained from; for Plant Ontology (PO) (v releases/2017-12-14) terms, see http://purl.bioontology.org/ontology/PO, e.g. petiole epidermis (PO_0000051). If an individual flower is sampled, the sex of it can be recorded here", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+) \\[[a-zA-Z]{2,}:[a-zA-Z0-9]\\d+\\]$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "radiation_regm": { "description": "Information about treatment involving exposure of plant or a plant part to a particular radiation regimen; should include the radiation type, amount or intensity administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple radiation regimens", "items": { "type": "string" }, "type": "array" }, "rainfall_regm": { "description": "Information about treatment involving an exposure to a given amount of rainfall, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "root_cond": { "description": "Relevant rooting conditions such as field plot size, sowing density, container dimensions, number of plants per container", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "root_med_carbon": { "description": "Source of organic carbon in the culture rooting medium; e.g. sucrose", "type": "string" }, "root_med_macronutr": { "description": "Measurement of the culture rooting medium macronutrients (N,P, K, Ca, Mg, S); e.g. KH2PO4 (170 mg/L)", "type": "string" }, "root_med_micronutr": { "description": "Measurement of the culture rooting medium micronutrients (Fe, Mn, Zn, B, Cu, Mo); e.g. H3BO3 (6.2 mg/L)", "type": "string" }, "root_med_ph": { "description": "pH measurement of the culture rooting medium; e.g. 5.5", "type": "number" }, "root_med_regl": { "description": "Growth regulators in the culture rooting medium such as cytokinins, auxins, gybberellins, abscisic acid; e.g. 0.5 mg/L NAA", "type": "string" }, "root_med_solid": { "description": "Specification of the solidifying agent in the culture rooting medium; e.g. agar", "type": "string" }, "root_med_suppl": { "description": "Organic supplements of the culture rooting medium, such as vitamins, amino acids, organic acids, antibiotics activated charcoal; e.g. nicotinic acid (0.5 mg/L)", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salt_regm": { "description": "Information about treatment involving use of salts as supplement to liquid and soil growth media; should include the name of salt, amount administered, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple salt regimens", "items": { "type": "string" }, "type": "array" }, "samp_capt_status": { "$ref": "#/$defs/SAMPCAPTSTATUSENUM", "description": "Reason for the sample" }, "samp_dis_stage": { "$ref": "#/$defs/SAMPDISSTAGEENUM", "description": "Stage of the disease at the time of sample collection, e.g. inoculation, penetration, infection, growth and reproduction, dissemination of pathogen" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_environment": { "description": "Treatment involving an exposure to a particular season (e.g. Winter, summer, rabi, rainy etc.), treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment", "items": { "type": "string" }, "type": "array" }, "standing_water_regm": { "description": "Treatment involving an exposure to standing water during a plant's life span, types can be flood water or standing water, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tiss_cult_growth_med": { "description": "Description of plant tissue culture growth media used", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_temp_regm": { "description": "Information about treatment involving an exposure to water with varying degree of temperature, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" }, "watering_regm": { "description": "Information about treatment involving an exposure to watering frequencies, treatment regimen including how many times the treatment was repeated, how long each treatment lasted, and the start and end time of the entire treatment; can include multiple regimens", "items": { "type": "string" }, "type": "array" } }, "required": [ "samp_name", "project_name" ], "title": "PlantAssociated", "type": "object" }, "QUADPOSENUM": { "description": "", "enum": [ "East side", "North side", "South side", "West side" ], "title": "QUADPOSENUM", "type": "string" }, "RELSAMPLOCENUM": { "description": "", "enum": [ "center of car", "edge of car", "under a seat" ], "title": "RELSAMPLOCENUM", "type": "string" }, "RELTOOXYGENENUM": { "description": "", "enum": [ "aerobe", "anaerobe", "facultative", "microaerophilic", "microanaerobe", "obligate aerobe", "obligate anaerobe" ], "title": "RELTOOXYGENENUM", "type": "string" }, "ROOMCONDTENUM": { "description": "", "enum": [ "damaged", "needs repair", "new", "rupture", "visible signs of mold/mildew", "visible wear" ], "title": "ROOMCONDTENUM", "type": "string" }, "ROOMCONNECTEDENUM": { "description": "", "enum": [ "attic", "bathroom", "closet", "conference room", "elevator", "examining room", "hallway", "kitchen", "mail room", "office", "stairwell" ], "title": "ROOMCONNECTEDENUM", "type": "string" }, "ROOMLOCENUM": { "description": "", "enum": [ "corner room", "exterior wall", "interior room" ], "title": "ROOMLOCENUM", "type": "string" }, "ROOMSAMPPOSENUM": { "description": "", "enum": [ "center", "east corner", "north corner", "northeast corner", "northwest corner", "south corner", "southeast corner", "southwest corner", "west corner" ], "title": "ROOMSAMPPOSENUM", "type": "string" }, "ROUTETRANSMISSIONENUM": { "description": "", "enum": [ "environmental:faecal-oral", "transplacental", "vector-borne:vector penetration" ], "title": "ROUTETRANSMISSIONENUM", "type": "string" }, "SAMPCAPTSTATUSENUM": { "description": "", "enum": [ "active surveillance in response to an outbreak", "active surveillance not initiated by an outbreak", "farm sample", "market sample", "other" ], "title": "SAMPCAPTSTATUSENUM", "type": "string" }, "SAMPCOLLECTPOINTENUM": { "description": "", "enum": [ "drilling rig", "other", "separator", "storage tank", "test well", "well", "wellhead" ], "title": "SAMPCOLLECTPOINTENUM", "type": "string" }, "SAMPDISSTAGEENUM": { "description": "", "enum": [ "dissemination", "growth and reproduction", "infection", "inoculation", "other", "penetration" ], "title": "SAMPDISSTAGEENUM", "type": "string" }, "SAMPLOCCONDITIONENUM": { "description": "", "enum": [ "damaged", "new", "rupture", "visible signs of mold-mildew", "visible weariness repair" ], "title": "SAMPLOCCONDITIONENUM", "type": "string" }, "SAMPSUBTYPEENUM": { "description": "", "enum": [ "biofilm", "not applicable", "oil phase", "other", "water phase" ], "title": "SAMPSUBTYPEENUM", "type": "string" }, "SAMPSURFMOISTUREENUM": { "description": "", "enum": [ "intermittent moisture", "not present", "submerged" ], "title": "SAMPSURFMOISTUREENUM", "type": "string" }, "SAMPTRANSPORTCONTENUM": { "description": "", "enum": [ "bottle", "cooler", "glass vial", "plastic vial", "vendor supplied container" ], "title": "SAMPTRANSPORTCONTENUM", "type": "string" }, "SAMPWEATHERENUM": { "description": "", "enum": [ "clear sky", "cloudy", "foggy", "hail", "rain", "sleet", "snow", "sunny", "windy" ], "title": "SAMPWEATHERENUM", "type": "string" }, "SCLYSISAPPROACHENUM": { "description": "", "enum": [ "chemical", "combination", "enzymatic", "physical" ], "title": "SCLYSISAPPROACHENUM", "type": "string" }, "SEASONUSEENUM": { "description": "", "enum": [ "Fall", "Spring", "Summer", "Winter" ], "title": "SEASONUSEENUM", "type": "string" }, "SEDIMENTTYPEENUM": { "description": "", "enum": [ "biogenous", "cosmogenous", "hydrogenous", "lithogenous" ], "title": "SEDIMENTTYPEENUM", "type": "string" }, "SEQQUALITYCHECKENUM": { "description": "", "enum": [ "manually edited", "none" ], "title": "SEQQUALITYCHECKENUM", "type": "string" }, "SHADINGDEVICELOCENUM": { "description": "", "enum": [ "exterior", "interior" ], "title": "SHADINGDEVICELOCENUM", "type": "string" }, "SHADINGDEVICETYPEENUM": { "description": "", "enum": [ "bahama shutters", "exterior roll blind", "gambrel awning", "hood awning", "porchroller awning", "sarasota shutters", "slatted aluminum", "solid aluminum awning", "sun screen", "tree", "trellis", "venetian awning" ], "title": "SHADINGDEVICETYPEENUM", "type": "string" }, "SHAREDENUM0": { "description": "", "enum": [ "east", "north", "northeast", "northwest", "south", "southeast", "southwest", "west" ], "title": "SHAREDENUM0", "type": "string" }, "SHAREDENUM1": { "description": "", "enum": [ "no presence of mold visible", "presence of mold visible" ], "title": "SHAREDENUM1", "type": "string" }, "SHAREDENUM2": { "description": "", "enum": [ "damaged", "needs repair", "new", "rupture", "visible wear" ], "title": "SHAREDENUM2", "type": "string" }, "SHAREDENUM3": { "description": "", "enum": [ "damaged", "needs repair", "new", "rupture", "visible wear" ], "title": "SHAREDENUM3", "type": "string" }, "SHAREDENUM4": { "description": "", "enum": [ "Santa-Fe texture", "crows feet", "crows-foot stomp", "double skip", "hawk and trowel", "knockdown", "orange peel", "popcorn", "rosebud stomp", "skip trowel", "smooth", "stomp knockdown", "swirl" ], "title": "SHAREDENUM4", "type": "string" }, "SHAREDENUM5": { "description": "", "enum": [ "Archean", "Cambrian", "Carboniferous", "Cenozoic", "Cretaceous", "Devonian", "Jurassic", "Mesozoic", "Neogene", "Ordovician", "Paleogene", "Paleozoic", "Permian", "Precambrian", "Proterozoic", "Silurian", "Triassic", "other" ], "title": "SHAREDENUM5", "type": "string" }, "SOILHORIZONENUM": { "description": "", "enum": [ "A horizon", "B horizon", "C horizon", "E horizon", "O horizon", "Permafrost", "R layer" ], "title": "SOILHORIZONENUM", "type": "string" }, "SOILTEXTURECLASSENUM": { "description": "", "enum": [ "clay", "clay loam", "loam", "loamy sand", "sand", "sandy clay", "sandy clay loam", "sandy loam", "silt", "silt loam", "silty clay", "silty clay loam" ], "title": "SOILTEXTURECLASSENUM", "type": "string" }, "SORTTECHENUM": { "description": "", "enum": [ "flow cytometric cell sorting", "lazer-tweezing", "microfluidics", "micromanipulation", "optical manipulation", "other" ], "title": "SORTTECHENUM", "type": "string" }, "SPACETYPSTATEENUM": { "description": "", "enum": [ "typically occupied", "typically unoccupied" ], "title": "SPACETYPSTATEENUM", "type": "string" }, "SPECIFICENUM": { "description": "", "enum": [ "as built", "bid", "construction", "design", "operation", "photos" ], "title": "SPECIFICENUM", "type": "string" }, "SRDEPENVENUM": { "description": "", "enum": [ "Fluvioldeltaic", "Fluviomarine", "Lacustine", "Marine", "other" ], "title": "SRDEPENVENUM", "type": "string" }, "SRKEROGTYPEENUM": { "description": "", "enum": [ "Type I", "Type II", "Type III", "Type IV", "other" ], "title": "SRKEROGTYPEENUM", "type": "string" }, "SRLITHOLOGYENUM": { "description": "", "enum": [ "Biosilicieous", "Carbonate", "Clastic", "Coal", "other" ], "title": "SRLITHOLOGYENUM", "type": "string" }, "SUBSTRUCTURETYPEENUM": { "description": "", "enum": [ "basement", "crawlspace", "slab on grade" ], "title": "SUBSTRUCTURETYPEENUM", "type": "string" }, "SURFAIRCONTENUM": { "description": "", "enum": [ "biocides", "biological contaminants", "dust", "nutrients", "organic matter", "particulate matter", "radon", "volatile organic compounds" ], "title": "SURFAIRCONTENUM", "type": "string" }, "SURFMATERIALENUM": { "description": "", "enum": [ "adobe", "carpet", "cinder blocks", "concrete", "glass", "hay bales", "metal", "paint", "plastic", "stainless steel", "stone", "stucco", "tile", "vinyl", "wood" ], "title": "SURFMATERIALENUM", "type": "string" }, "SYMBIONTHOSTROLEENUM": { "description": "", "enum": [ "accidental", "dead-end", "definitive", "intermediate", "paratenic", "reservoir", "single host" ], "title": "SYMBIONTHOSTROLEENUM", "type": "string" }, "SYMLIFECYCLETYPEENUM": { "description": "", "enum": [ "complex life cycle", "simple life cycle" ], "title": "SYMLIFECYCLETYPEENUM", "type": "string" }, "Sediment": { "additionalProperties": false, "description": "sediment extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "methane": { "description": "Methane (gas) amount or concentration at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "particle_class": { "description": "Particles are classified, based on their size, into six general categories:clay, silt, sand, gravel, cobbles, and boulders; should include amount of particle preceded by the name of the particle type; can include multiple values", "items": { "type": "string" }, "type": "array" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "porosity": { "description": "Porosity of deposited sediment is volume of voids divided by the total volume of sample", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sediment_type": { "$ref": "#/$defs/SEDIMENTTYPEENUM", "description": "Information about the sediment type based on major constituents" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_carb": { "description": "Total carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "depth" ], "title": "Sediment", "type": "object" }, "Soil": { "additionalProperties": false, "description": "soil extension", "properties": { "agrochem_addition": { "description": "Addition of fertilizers, pesticides, etc. - amount and time of applications", "items": { "type": "string" }, "type": "array" }, "al_sat": { "description": "Aluminum saturation (esp. For tropical soils)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "al_sat_meth": { "description": "Reference or method used in determining Al saturation", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "annual_precpt": { "description": "The average of all annual precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "annual_temp": { "description": "Mean annual temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "crop_rotation": { "description": "Whether or not crop is rotated, and if yes, rotation schedule", "type": "string" }, "cur_land_use": { "description": "Present state of sample site", "type": "string" }, "cur_vegetation": { "description": "Vegetation classification from one or more standard classification systems, or agricultural crop", "type": "string" }, "cur_vegetation_meth": { "description": "Reference or method used in vegetation classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "drainage_class": { "$ref": "#/$defs/DRAINAGECLASSENUM", "description": "Drainage classification from a standard system such as the USDA system" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "extreme_event": { "description": "Unusual physical events that may have affected microbial populations", "format": "date-time", "type": "string" }, "fao_class": { "$ref": "#/$defs/FAOCLASSENUM", "description": "Soil classification from the FAO World Reference Database for Soil Resources. The list can be found at http://www.fao.org/nr/land/sols/soil/wrb-soil-maps/reference-groups" }, "fire": { "description": "Historical and/or physical evidence of fire", "format": "date-time", "type": "string" }, "flooding": { "description": "Historical and/or physical evidence of flooding", "format": "date-time", "type": "string" }, "heavy_metals": { "description": "Heavy metals present in the sequenced sample and their concentrations. For multiple heavy metals and concentrations, add multiple copies of this field", "items": { "type": "string" }, "type": "array" }, "heavy_metals_meth": { "description": "Reference or method used in determining heavy metals", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "horizon_meth": { "description": "Reference or method used in determining the horizon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_addit_analys": { "description": "Link to additional analysis results performed on the sample", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "link_class_info": { "description": "Link to digitized soil maps or other soil classification information", "type": "string" }, "link_climate_info": { "description": "Link to climate resource", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "local_class": { "description": "Soil classification based on local soil classification system", "type": "string" }, "local_class_meth": { "description": "Reference or method used in determining the local soil classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "micro_biomass_meth": { "description": "Reference or method used in determining microbial biomass", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "microbial_biomass": { "description": "The part of the organic matter in the soil that constitutes living microorganisms smaller than 5-10 micrometer. If you keep this, you would need to have correction factors used for conversion to the final units", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "ph_meth": { "description": "Reference or method used in determining pH", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "pool_dna_extracts": { "description": "Indicate whether multiple DNA extractions were mixed. If the answer yes, the number of extracts that were pooled should be given", "type": "string" }, "prev_land_use_meth": { "description": "Reference or method used in determining previous land use and dates", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "previous_land_use": { "description": "Previous land use and dates", "type": "string" }, "profile_position": { "$ref": "#/$defs/PROFILEPOSITIONENUM", "description": "Cross-sectional position in the hillslope where sample was collected.sample area position in relation to surrounding areas" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_precpt": { "description": "The average of all seasonal precipitation values known, or an estimated equivalent value derived by such methods as regional indexes or Isohyetal maps", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "season_temp": { "description": "Mean seasonal temperature", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sieving": { "description": "Collection design of pooled samples and/or sieve size and amount of sample sieved", "type": "string" }, "slope_aspect": { "description": "The direction a slope faces. While looking down a slope use a compass to record the direction you are facing (direction or degrees); e.g., nw or 315 degrees. This measure provides an indication of sun and wind exposure that will influence soil temperature and evapotranspiration", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "slope_gradient": { "description": "Commonly called 'slope'. The angle between ground surface and a horizontal line (in percent). This is the direction that overland water would flow. This measure is usually taken with a hand level meter or clinometer", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_horizon": { "$ref": "#/$defs/SOILHORIZONENUM", "description": "Specific layer in the land area which measures parallel to the soil surface and possesses physical characteristics which differ from the layers above and beneath" }, "soil_texture": { "description": "The relative proportion of different grain sizes of mineral particles in a soil, as described using a standard system; express as % sand (50 um to 2 mm), silt (2 um to 50 um), and clay (<2 um) with textural name (e.g., silty clay loam) optional", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soil_texture_meth": { "description": "Reference or method used in determining soil texture", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "soil_type": { "description": "Description of the soil type or classification. This field accepts terms under soil (http://purl.obolibrary.org/obo/ENVO_00001998). Multiple terms can be separated by pipes", "type": "string" }, "soil_type_meth": { "description": "Reference or method used in determining soil series name or other lower-level classification", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "store_cond": { "description": "Explain how and for how long the soil sample was stored before DNA extraction (fresh/frozen/other)", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tillage": { "description": "Note method(s) used for tilling", "items": { "$ref": "#/$defs/TILLAGEENUM" }, "type": "array" }, "tot_nitro_cont_meth": { "description": "Reference or method used in determining the total nitrogen", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_nitro_content": { "description": "Total nitrogen content of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_org_c_meth": { "description": "Reference or method used in determining total organic carbon", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "tot_org_carb": { "description": "Total organic carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_cont_soil_meth": { "description": "Reference or method used in determining the water content of soil", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$$", "type": "string" }, "water_content": { "description": "Water content measurement", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "depth", "elev" ], "title": "Soil", "type": "object" }, "SymbiontAssociated": { "additionalProperties": false, "description": "symbiont-associated extension", "properties": { "alt": { "description": "Heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "association_duration": { "description": "Time spent in host of the symbiotic organism at the time of sampling; relevant scale depends on symbiotic organism and study", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "collection_date": { "description": "The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant", "format": "date-time", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "geo_loc_name": { "description": "The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ)", "pattern": "^([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+): ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+), ([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "gravidity": { "description": "Whether or not subject is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_age": { "description": "Age of host at the time of sampling; relevant scale depends on species and study, e.g. Could be seconds for amoebae or centuries for trees", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_body_habitat": { "description": "Original body habitat where the sample was obtained from", "type": "string" }, "host_body_product": { "description": "Substance produced by the body, e.g. Stool, mucus, where the sample was obtained from. Use terms from the foundational model of anatomy ontology (fma) or Uber-anatomy ontology (UBERON)", "type": "string" }, "host_body_site": { "description": "Name of body site where the sample was obtained from, such as a specific organ or tissue (tongue, lung etc...). Use terms from the foundational model of anatomy ontology (fma) or the Uber-anatomy ontology (UBERON)", "type": "string" }, "host_cellular_loc": { "$ref": "#/$defs/HOSTCELLULARLOCENUM", "description": "The localization of the symbiotic host organism within the host from which it was sampled: e.g. intracellular if the symbiotic host organism is localized within the cells or extracellular if the symbiotic host organism is localized outside of cells" }, "host_color": { "description": "The color of host", "type": "string" }, "host_common_name": { "description": "Common name of the host", "type": "string" }, "host_dependence": { "$ref": "#/$defs/HOSTDEPENDENCEENUM", "description": "Type of host dependence for the symbiotic host organism to its host" }, "host_dry_mass": { "description": "Measurement of dry mass", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_fam_rel": { "description": "Relationships to other hosts in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_genotype": { "description": "Observed genotype", "type": "string" }, "host_growth_cond": { "description": "Literature reference giving growth conditions of the host", "pattern": "^^PMID:\\d+$|^doi:10.\\d{2,9}/.*$|^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$|([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_height": { "description": "The height of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_infra_spec_name": { "description": "Taxonomic information about the host below subspecies level", "type": "string" }, "host_infra_spec_rank": { "description": "Taxonomic rank information about the host below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_length": { "description": "The length of subject", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_life_stage": { "description": "Description of life stage of host", "type": "string" }, "host_number": { "description": "Number of symbiotic host individuals pooled at the time of collection", "type": "string" }, "host_of_host_coinf": { "description": "The taxonomic name of any coinfecting organism observed in a symbiotic relationship with the host of the sampled host organism. e.g. where a sample collected from a host trematode species (A) which was collected from a host_of_host fish (B) that was also infected with a nematode (C), the value here would be (C) the nematode {species name} or {common name}. Multiple co-infecting species may be added in a comma-separated list. For listing symbiotic organisms associated with the host (A) use the term Observed host symbiont", "type": "string" }, "host_of_host_disease": { "description": "List of diseases with which the host of the symbiotic host organism has been diagnosed; can include multiple diagnoses. The value of the field depends on host; for humans the terms should be chosen from the DO (Human Disease Ontology) at https://www.disease-ontology.org, non-human host diseases are free text", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_loc": { "description": "For a symbiotic host organism the local anatomical environment within its host may have causal influences. Report the anatomical entity(s) which are in the direct environment of the symbiotic host organism being sampled and which you believe have significant causal influences on your sample or specimen. For example, if the symbiotic host organism being sampled is an intestinal worm, its local environmental context will be the term for intestine from UBERON (http://uberon.github.io/)", "items": { "type": "string" }, "type": "array" }, "host_of_host_env_med": { "description": "Report the environmental material(s) immediately surrounding the symbiotic host organism at the time of sampling. This usually will be a tissue or substance type from the host, but may be another material if the symbiont is external to the host. We recommend using classes from the UBERON ontology, but subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483) may also be used. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart).MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. intestines, heart)", "type": "string" }, "host_of_host_fam_rel": { "description": "Familial relationship of the host of the symbiotic host organisms to other hosts of symbiotic host organism in the same study; can include multiple relationships", "items": { "type": "string" }, "type": "array" }, "host_of_host_geno": { "description": "Observed genotype of the host of the symbiotic host organism", "type": "string" }, "host_of_host_gravid": { "description": "Whether or not the host of the symbiotic host organism is gravid, and if yes date due or date post-conception, specifying which is used", "type": "string" }, "host_of_host_infname": { "description": "Taxonomic name information of the host of the symbiotic host organism below subspecies level", "type": "string" }, "host_of_host_infrank": { "description": "Taxonomic rank information about the host of the symbiotic host organism below subspecies level, such as variety, form, rank etc", "type": "string" }, "host_of_host_name": { "description": "Common name of the host of the symbiotic host organism", "type": "string" }, "host_of_host_pheno": { "description": "Phenotype of the host of the symbiotic host organism. For phenotypic quality ontology (PATO) terms, see http://purl.bioontology.org/ontology/pato", "type": "string" }, "host_of_host_sub_id": { "description": "A unique identifier by which each host of the symbiotic host organism subject can be referred to, de-identified, e.g. #H14", "type": "string" }, "host_of_host_taxid": { "description": "NCBI taxon id of the host of the symbiotic host organism", "type": "string" }, "host_of_host_totmass": { "description": "Total mass of the host of the symbiotic host organism at collection, the unit depends on the host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "host_phenotype": { "description": "Phenotype of human or other host. Use terms from the phenotypic quality ontology (pato) or the Human Phenotype Ontology (HP)", "type": "string" }, "host_shape": { "description": "Morphological shape of host", "type": "string" }, "host_specificity": { "$ref": "#/$defs/HOSTSPECIFICITYENUM", "description": "Level of specificity of symbiont-host interaction: e.g. generalist (symbiont able to establish associations with distantly related hosts) or species-specific" }, "host_subject_id": { "description": "A unique identifier by which each subject can be referred to, de-identified", "type": "string" }, "host_substrate": { "description": "The growth substrate of the host", "type": "string" }, "host_symbiont": { "description": "The taxonomic name of the organism(s) found living in mutualistic, commensalistic, or parasitic symbiosis with the specific host. The sampled symbiont can have its own symbionts. For example, parasites may have hyperparasites (=parasites of the parasite)", "items": { "type": "string" }, "type": "array" }, "host_taxid": { "description": "NCBI taxon id of the host, e.g. 9606", "type": "string" }, "host_tot_mass": { "description": "Total mass of the host at collection, the unit depends on host", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "lat_lon": { "description": "The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system", "pattern": "^(-?((?:[0-8]?[0-9](?:\\.\\d{0,8})?)|90)) -?[0-9]+(?:\\.[0-9]{0,8})?$|^-?(1[0-7]{1,2})$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "mode_transmission": { "$ref": "#/$defs/MODETRANSMISSIONENUM", "description": "The process through which the symbiotic host organism entered the host from which it was sampled" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "route_transmission": { "$ref": "#/$defs/ROUTETRANSMISSIONENUM", "description": "Description of path taken by the symbiotic host organism being sampled in order to establish a symbiotic relationship with the host (with which it was observed at the time of sampling) via a mode of transmission (specified in mode_transmission)" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_sol": { "description": "Solution within which sample was stored, if any", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "source_mat_id": { "description": "A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2)", "items": { "type": "string" }, "type": "array" }, "sym_life_cycle_type": { "$ref": "#/$defs/SYMLIFECYCLETYPEENUM", "description": "Type of life cycle of the symbiotic host species (the thing being sampled). Simple life cycles occur within a single host, complex ones within multiple different hosts over the course of their normal life cycle" }, "symbiont_host_role": { "$ref": "#/$defs/SYMBIONTHOSTROLEENUM", "description": "Role of the host in the life cycle of the symbiotic organism" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "type_of_symbiosis": { "$ref": "#/$defs/TYPEOFSYMBIOSISENUM", "description": "Type of biological interaction established between the symbiotic host organism being sampled and its respective host" }, "urobiom_sex": { "$ref": "#/$defs/UROBIOMSEXENUM", "description": "Physical sex of the host" } }, "required": [ "samp_name", "project_name", "lat_lon", "geo_loc_name", "collection_date", "host_dependence", "sym_life_cycle_type", "host_life_stage" ], "title": "SymbiontAssociated", "type": "object" }, "TAXIDENTENUM": { "description": "", "enum": [ "16S rRNA gene", "multi-marker approach", "other" ], "title": "TAXIDENTENUM", "type": "string" }, "TIDALSTAGEENUM": { "description": "", "enum": [ "ebb tide", "flood tide", "high tide", "low tide" ], "title": "TIDALSTAGEENUM", "type": "string" }, "TILLAGEENUM": { "description": "", "enum": [ "chisel", "cutting disc", "disc plough", "drill", "mouldboard", "ridge till", "strip tillage", "tined", "zonal tillage" ], "title": "TILLAGEENUM", "type": "string" }, "TRAINLINEENUM": { "description": "", "enum": [ "green", "orange", "red" ], "title": "TRAINLINEENUM", "type": "string" }, "TRAINSTATLOCENUM": { "description": "", "enum": [ "forest hills", "riverside", "south station above ground", "south station amtrak", "south station underground" ], "title": "TRAINSTATLOCENUM", "type": "string" }, "TRAINSTOPLOCENUM": { "description": "", "enum": [ "downtown", "end", "mid" ], "title": "TRAINSTOPLOCENUM", "type": "string" }, "TROPHICLEVELENUM": { "description": "", "enum": [ "autotroph", "carboxydotroph", "chemoautolithotroph", "chemoautotroph", "chemoheterotroph", "chemolithoautotroph", "chemolithotroph", "chemoorganoheterotroph", "chemoorganotroph", "chemosynthetic", "chemotroph", "copiotroph", "diazotroph", "facultative", "heterotroph", "lithoautotroph", "lithoheterotroph", "lithotroph", "methanotroph", "methylotroph", "mixotroph", "obligate", "oligotroph", "organoheterotroph", "organotroph", "photoautotroph", "photoheterotroph", "photolithoautotroph", "photolithotroph", "photosynthetic", "phototroph" ], "title": "TROPHICLEVELENUM", "type": "string" }, "TYPEOFSYMBIOSISENUM": { "description": "", "enum": [ "commensalistic", "mutualistic", "parasitic" ], "title": "TYPEOFSYMBIOSISENUM", "type": "string" }, "URINECOLLECTMETHENUM": { "description": "", "enum": [ "catheter", "clean catch" ], "title": "URINECOLLECTMETHENUM", "type": "string" }, "UROBIOMSEXENUM": { "description": "", "enum": [ "female", "hermaphrodite", "male", "neuter" ], "title": "UROBIOMSEXENUM", "type": "string" }, "VIRUSENRICHAPPRENUM": { "description": "", "enum": [ "CsCl density gradient", "DNAse", "FeCl Precipitation", "PEG Precipitation", "RNAse", "centrifugation", "filtration", "none", "other", "targeted sequence capture", "ultracentrifugation", "ultrafiltration" ], "title": "VIRUSENRICHAPPRENUM", "type": "string" }, "WALLCONSTTYPEENUM": { "description": "", "enum": [ "fire resistive", "frame construction", "joisted masonry", "light noncombustible", "masonry noncombustible", "modified fire resistive" ], "title": "WALLCONSTTYPEENUM", "type": "string" }, "WALLFINISHMATENUM": { "description": "", "enum": [ "acoustical treatment", "gypsum board", "gypsum plaster", "masonry", "metal", "plaster", "stone facing", "terrazzo", "tile", "veneer plaster", "wood" ], "title": "WALLFINISHMATENUM", "type": "string" }, "WALLSURFTREATMENTENUM": { "description": "", "enum": [ "fabric", "no treatment", "painted", "paneling", "stucco", "wall paper" ], "title": "WALLSURFTREATMENTENUM", "type": "string" }, "WATERFEATTYPEENUM": { "description": "", "enum": [ "fountain", "pool", "standing feature", "stream", "waterfall" ], "title": "WATERFEATTYPEENUM", "type": "string" }, "WEEKDAYENUM": { "description": "", "enum": [ "Friday", "Monday", "Saturday", "Sunday", "Thursday", "Tuesday", "Wednesday" ], "title": "WEEKDAYENUM", "type": "string" }, "WGAAMPAPPRENUM": { "description": "", "enum": [ "mda based", "pcr based" ], "title": "WGAAMPAPPRENUM", "type": "string" }, "WINDOWCOVERENUM": { "description": "", "enum": [ "blinds", "curtains", "none" ], "title": "WINDOWCOVERENUM", "type": "string" }, "WINDOWHORIZPOSENUM": { "description": "", "enum": [ "left", "middle", "right" ], "title": "WINDOWHORIZPOSENUM", "type": "string" }, "WINDOWMATENUM": { "description": "", "enum": [ "clad", "fiberglass", "metal", "vinyl", "wood" ], "title": "WINDOWMATENUM", "type": "string" }, "WINDOWSTATUSENUM": { "description": "", "enum": [ "closed", "open" ], "title": "WINDOWSTATUSENUM", "type": "string" }, "WINDOWTYPEENUM": { "description": "", "enum": [ "fixed window", "horizontal sash window", "single-hung sash window" ], "title": "WINDOWTYPEENUM", "type": "string" }, "WINDOWVERTPOSENUM": { "description": "", "enum": [ "bottom", "high", "low", "middle", "top" ], "title": "WINDOWVERTPOSENUM", "type": "string" }, "WastewaterSludge": { "additionalProperties": false, "description": "wastewater/sludge extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biochem_oxygen_dem": { "description": "Amount of dissolved oxygen needed by aerobic biological organisms in a body of water to break down organic material present in a given water sample at certain temperature over a specific time period", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chem_oxygen_dem": { "description": "A measure of the capacity of water to consume oxygen during the decomposition of organic matter and the oxidation of inorganic chemicals such as ammonia and nitrite", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "efficiency_percent": { "description": "Percentage of volatile solids removed from the anaerobic digestor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "emulsions": { "description": "Amount or concentration of substances such as paints, adhesives, mayonnaise, hair colorants, emulsified oils, etc.; can include multiple emulsion types", "items": { "type": "string" }, "type": "array" }, "gaseous_substances": { "description": "Amount or concentration of substances such as hydrogen sulfide, carbon dioxide, methane, etc.; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "indust_eff_percent": { "description": "Percentage of industrial effluents received by wastewater treatment plant", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "inorg_particles": { "description": "Concentration of particles such as sand, grit, metal particles, ceramics, etc.; can include multiple particles", "items": { "type": "string" }, "type": "array" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_particles": { "description": "Concentration of particles such as faeces, hairs, food, vomit, paper fibers, plant material, humus, etc", "items": { "type": "string" }, "type": "array" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pre_treatment": { "description": "The process of pre-treatment removes materials that can be easily collected from the raw wastewater", "type": "string" }, "primary_treatment": { "description": "The process to produce both a generally homogeneous liquid capable of being treated biologically and a sludge that can be separately treated or processed", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "reactor_type": { "description": "Anaerobic digesters can be designed and engineered to operate using a number of different process configurations, as batch or continuous, mesophilic, high solid or low solid, and single stage or multistage", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "secondary_treatment": { "description": "The process for substantially degrading the biological content of the sewage", "type": "string" }, "sewage_type": { "description": "Type of wastewater treatment plant as municipial or industrial", "type": "string" }, "sludge_retent_time": { "description": "The time activated sludge remains in reactor", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_inorg_mat": { "description": "Concentration of substances such as ammonia, road-salt, sea-salt, cyanide, hydrogen sulfide, thiocyanates, thiosulfates, etc", "items": { "type": "string" }, "type": "array" }, "soluble_org_mat": { "description": "Concentration of substances such as urea, fruit sugars, soluble proteins, drugs, pharmaceuticals, etc", "items": { "type": "string" }, "type": "array" }, "suspend_solids": { "description": "Concentration of substances including a wide variety of material, such as silt, decaying plant and animal matter; can include multiple substances", "items": { "type": "string" }, "type": "array" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tertiary_treatment": { "description": "The process providing a final treatment stage to raise the effluent quality before it is discharged to the receiving environment", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosphate": { "description": "Total amount or concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "wastewater_type": { "description": "The origin of wastewater such as human waste, rainfall, storm drains, etc", "type": "string" } }, "required": [ "samp_name", "project_name" ], "title": "WastewaterSludge", "type": "object" }, "Water": { "additionalProperties": false, "description": "water extension", "properties": { "alkalinity": { "description": "Alkalinity, the ability of a solution to neutralize acids to the equivalence point of carbonate or bicarbonate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "alkalinity_method": { "description": "Method used for alkalinity measurement", "type": "string" }, "alkyl_diethers": { "description": "Concentration of alkyl diethers", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "aminopept_act": { "description": "Measurement of aminopeptidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ammonium": { "description": "Concentration of ammonium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "atmospheric_data": { "description": "Measurement of atmospheric data; can include multiple data", "items": { "type": "string" }, "type": "array" }, "bac_prod": { "description": "Bacterial production in the water column measured by isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bac_resp": { "description": "Measurement of bacterial respiration in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bacteria_carb_prod": { "description": "Measurement of bacterial carbon production", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "biomass": { "description": "Amount of biomass; should include the name for the part of biomass measured, e.g. Microbial, total. Can include multiple measurements", "items": { "type": "string" }, "type": "array" }, "bishomohopanol": { "description": "Concentration of bishomohopanol", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "bromide": { "description": "Concentration of bromide", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "calcium": { "description": "Concentration of calcium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "carb_nitro_ratio": { "description": "Ratio of amount or concentrations of carbon to nitrogen", "type": "number" }, "chem_administration": { "description": "List of chemical compounds administered to the host or site where sampling occurred, and when (e.g. Antibiotics, n fertilizer, air filter); can include multiple compounds. For chemical entities of biological interest ontology (chebi) (v 163), http://purl.bioontology.org/ontology/chebi", "items": { "type": "string" }, "type": "array" }, "chloride": { "description": "Concentration of chloride in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "chlorophyll": { "description": "Concentration of chlorophyll", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "conduc": { "description": "Electrical conductivity of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "density": { "description": "Density of the sample, which is its mass per unit volume (aka volumetric mass density)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "depth": { "description": "The vertical distance below local surface. For sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diether_lipids": { "description": "Concentration of diether lipids; can include multiple types of diether lipids", "items": { "type": "string" }, "type": "array" }, "diss_carb_dioxide": { "description": "Concentration of dissolved carbon dioxide in the sample or liquid portion of the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_hydrogen": { "description": "Concentration of dissolved hydrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_carb": { "description": "Dissolved inorganic carbon concentration in the sample, typically measured after filtering the sample using a 0.45 micrometer filter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_nitro": { "description": "Concentration of dissolved inorganic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_inorg_phosp": { "description": "Concentration of dissolved inorganic phosphorus in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_carb": { "description": "Concentration of dissolved organic carbon in the sample, liquid portion of the sample, or aqueous phase of the fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_org_nitro": { "description": "Dissolved organic nitrogen concentration measured as; total dissolved nitrogen - NH4 - NO3 - NO2", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "diss_oxygen": { "description": "Concentration of dissolved oxygen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "down_par": { "description": "Visible waveband radiance and irradiance measurements in the water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "elev": { "description": "Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "fluor": { "description": "Raw or converted fluorescence of water", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "glucosidase_act": { "description": "Measurement of glucosidase activity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "light_intensity": { "description": "Measurement of light intensity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "magnesium": { "description": "Concentration of magnesium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_frict_vel": { "description": "Measurement of mean friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "mean_peak_frict_vel": { "description": "Measurement of mean peak friction velocity", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "misc_param": { "description": "Any other measurement performed or parameter collected, that is not listed here", "items": { "type": "string" }, "type": "array" }, "n_alkanes": { "description": "Concentration of n-alkanes; can include multiple n-alkanes", "items": { "type": "string" }, "type": "array" }, "nitrate": { "description": "Concentration of nitrate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitrite": { "description": "Concentration of nitrite in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "nitro": { "description": "Concentration of nitrogen (total)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_carb": { "description": "Concentration of organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_matter": { "description": "Concentration of organic matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "org_nitro": { "description": "Concentration of organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "organism_count": { "description": "Total cell count of any organism (or group of organisms) per gram, volume or area of sample, should include name of organism followed by count. The method that was used for the enumeration (e.g. qPCR, atp, mpn, etc.) Should also be provided. (example: total prokaryotes; 3.5e7 cells per ml; qpcr)", "items": { "type": "string" }, "type": "array" }, "oxy_stat_samp": { "$ref": "#/$defs/OXYSTATSAMPENUM", "description": "Oxygenation status of sample" }, "part_org_carb": { "description": "Concentration of particulate organic carbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "part_org_nitro": { "description": "Concentration of particulate organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "perturbation": { "description": "Type of perturbation, e.g. chemical administration, physical disturbance, etc., coupled with perturbation regimen including how many times the perturbation was repeated, how long each perturbation lasted, and the start and end time of the entire perturbation period; can include multiple perturbation types", "items": { "type": "string" }, "type": "array" }, "petroleum_hydrocarb": { "description": "Concentration of petroleum hydrocarbon", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "ph": { "description": "Ph measurement of the sample, or liquid portion of sample, or aqueous phase of the fluid", "type": "number" }, "phaeopigments": { "description": "Concentration of phaeopigments; can include multiple phaeopigments", "items": { "type": "string" }, "type": "array" }, "phosphate": { "description": "Concentration of phosphate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "phosplipid_fatt_acid": { "description": "Concentration of phospholipid fatty acids; can include multiple values", "items": { "type": "string" }, "type": "array" }, "photon_flux": { "description": "Measurement of photon flux", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "potassium": { "description": "Concentration of potassium in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "pressure": { "description": "Pressure to which the sample is subject to, in atmospheres", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "primary_prod": { "description": "Measurement of primary production, generally measured as isotope uptake", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "project_name": { "description": "Name of the project within which the sequencing was organized", "type": "string" }, "redox_potential": { "description": "Redox potential, measured relative to a hydrogen cell, indicating oxidation or reduction potential", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "salinity": { "description": "The total concentration of all dissolved salts in a liquid or solid sample. While salinity can be measured by a complete chemical analysis, this method is difficult and time consuming. More often, it is instead derived from the conductivity measurement. This is known as practical salinity. These derivations compare the specific conductance of the sample to a salinity standard such as seawater", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_name": { "description": "A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name", "type": "string" }, "samp_store_dur": { "description": "Duration for which the sample was stored. Indicate the duration for which the sample was stored written in ISO 8601 format", "pattern": "^P(?:(?:\\d+D|\\d+M(?:\\d+D)?|\\d+Y(?:\\d+M(?:\\d+D)?)?)(?:T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S))?|T(?:\\d+H(?:\\d+M(?:\\d+S)?)?|\\d+M(?:\\d+S)?|\\d+S)|\\d+W)$", "type": "string" }, "samp_store_loc": { "description": "Location at which sample was stored, usually name of a specific freezer/room", "type": "string" }, "samp_store_temp": { "description": "Temperature at which sample was stored, e.g. -80 degree Celsius", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "samp_vol_we_dna_ext": { "description": "Volume (ml) or mass (g) of total collected sample processed for DNA extraction. Note: total sample collected should be entered under the term Sample Size (MIXS:0000001)", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "silicate": { "description": "Concentration of silicate", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_low": { "description": "Refers to the mesh/pore size used to pre-filter/pre-sort the sample. Materials larger than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "size_frac_up": { "description": "Mesh or pore size of the device used to retain the sample. Materials smaller than the size threshold are excluded from the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sodium": { "description": "Sodium concentration in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "soluble_react_phosp": { "description": "Concentration of soluble reactive phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfate": { "description": "Concentration of sulfate in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "sulfide": { "description": "Concentration of sulfide in the sample", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "suspend_part_matter": { "description": "Concentration of suspended particulate matter", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "temp": { "description": "Temperature of the sample at the time of sampling", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tidal_stage": { "$ref": "#/$defs/TIDALSTAGEENUM", "description": "Stage of tide" }, "tot_depth_water_col": { "description": "Measurement of total depth of water column", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_diss_nitro": { "description": "Total dissolved nitrogen concentration, reported as nitrogen, measured by: total dissolved nitrogen = NH4 + NO3NO2 + dissolved organic nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_inorg_nitro": { "description": "Total inorganic nitrogen content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_nitro": { "description": "Total nitrogen concentration of water samples, calculated by: total nitrogen = total dissolved nitrogen + particulate nitrogen. Can also be measured without filtering, reported as nitrogen", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_part_carb": { "description": "Total particulate carbon content", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "tot_phosp": { "description": "Total phosphorus concentration in the sample, calculated by: total phosphorus = total dissolved phosphorus + particulate phosphorus", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "turbidity": { "description": "Measure of the amount of cloudiness or haziness in water caused by individual particles", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" }, "water_current": { "description": "Measurement of magnitude and direction of flow within a fluid", "pattern": "^[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?( *- *[-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?)? *([^\\s-]{1,2}|[^\\s-]+.+[^\\s-]+)$", "type": "string" } }, "required": [ "samp_name", "project_name", "depth" ], "title": "Water", "type": "object" } }, "$id": "https://w3id.org/mixs", "$schema": "http://json-schema.org/draft-07/schema#", "additionalProperties": true, "description": "A collection of data that complies with some combination of a MIxS checklist and environmental extension", "metamodel_version": "1.7.0", "properties": { "agriculture_data": { "description": "Data that complies with Extension Agriculture", "items": { "$ref": "#/$defs/Agriculture" }, "type": "array" }, "air_data": { "description": "Data that complies with Extension Air", "items": { "$ref": "#/$defs/Air" }, "type": "array" }, "built_environment_data": { "description": "Data that complies with Extension BuiltEnvironment", "items": { "$ref": "#/$defs/BuiltEnvironment" }, "type": "array" }, "food_animal_and_animal_feed_data": { "description": "Data that complies with Extension FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/FoodAnimalAndAnimalFeed" }, "type": "array" }, "food_farm_environment_data": { "description": "Data that complies with Extension FoodFarmEnvironment", "items": { "$ref": "#/$defs/FoodFarmEnvironment" }, "type": "array" }, "food_food_production_facility_data": { "description": "Data that complies with Extension FoodFoodProductionFacility", "items": { "$ref": "#/$defs/FoodFoodProductionFacility" }, "type": "array" }, "food_human_foods_data": { "description": "Data that complies with Extension FoodHumanFoods", "items": { "$ref": "#/$defs/FoodHumanFoods" }, "type": "array" }, "host_associated_data": { "description": "Data that complies with Extension HostAssociated", "items": { "$ref": "#/$defs/HostAssociated" }, "type": "array" }, "human_associated_data": { "description": "Data that complies with Extension HumanAssociated", "items": { "$ref": "#/$defs/HumanAssociated" }, "type": "array" }, "human_gut_data": { "description": "Data that complies with Extension HumanGut", "items": { "$ref": "#/$defs/HumanGut" }, "type": "array" }, "human_oral_data": { "description": "Data that complies with Extension HumanOral", "items": { "$ref": "#/$defs/HumanOral" }, "type": "array" }, "human_skin_data": { "description": "Data that complies with Extension HumanSkin", "items": { "$ref": "#/$defs/HumanSkin" }, "type": "array" }, "human_vaginal_data": { "description": "Data that complies with Extension HumanVaginal", "items": { "$ref": "#/$defs/HumanVaginal" }, "type": "array" }, "hydrocarbon_resources_cores_data": { "description": "Data that complies with Extension HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/HydrocarbonResourcesCores" }, "type": "array" }, "hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Extension HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/HydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "microbial_mat_biofilm_data": { "description": "Data that complies with Extension MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MicrobialMatBiofilm" }, "type": "array" }, "migs_ba_agriculture_data": { "description": "Data that complies with MigsBa combined with Agriculture", "items": { "$ref": "#/$defs/MigsBaAgriculture" }, "type": "array" }, "migs_ba_air_data": { "description": "Data that complies with MigsBa combined with Air", "items": { "$ref": "#/$defs/MigsBaAir" }, "type": "array" }, "migs_ba_built_environment_data": { "description": "Data that complies with MigsBa combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsBaBuiltEnvironment" }, "type": "array" }, "migs_ba_data": { "description": "Data that complies with checklist MigsBa", "items": { "$ref": "#/$defs/MigsBa" }, "type": "array" }, "migs_ba_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsBa combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsBaFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_ba_food_farm_environment_data": { "description": "Data that complies with MigsBa combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsBaFoodFarmEnvironment" }, "type": "array" }, "migs_ba_food_food_production_facility_data": { "description": "Data that complies with MigsBa combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsBaFoodFoodProductionFacility" }, "type": "array" }, "migs_ba_food_human_foods_data": { "description": "Data that complies with MigsBa combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsBaFoodHumanFoods" }, "type": "array" }, "migs_ba_host_associated_data": { "description": "Data that complies with MigsBa combined with HostAssociated", "items": { "$ref": "#/$defs/MigsBaHostAssociated" }, "type": "array" }, "migs_ba_human_associated_data": { "description": "Data that complies with MigsBa combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsBaHumanAssociated" }, "type": "array" }, "migs_ba_human_gut_data": { "description": "Data that complies with MigsBa combined with HumanGut", "items": { "$ref": "#/$defs/MigsBaHumanGut" }, "type": "array" }, "migs_ba_human_oral_data": { "description": "Data that complies with MigsBa combined with HumanOral", "items": { "$ref": "#/$defs/MigsBaHumanOral" }, "type": "array" }, "migs_ba_human_skin_data": { "description": "Data that complies with MigsBa combined with HumanSkin", "items": { "$ref": "#/$defs/MigsBaHumanSkin" }, "type": "array" }, "migs_ba_human_vaginal_data": { "description": "Data that complies with MigsBa combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsBaHumanVaginal" }, "type": "array" }, "migs_ba_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsBa combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsBaHydrocarbonResourcesCores" }, "type": "array" }, "migs_ba_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsBa combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsBaHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_ba_microbial_mat_biofilm_data": { "description": "Data that complies with MigsBa combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsBaMicrobialMatBiofilm" }, "type": "array" }, "migs_ba_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsBa combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsBaMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_ba_plant_associated_data": { "description": "Data that complies with MigsBa combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsBaPlantAssociated" }, "type": "array" }, "migs_ba_sediment_data": { "description": "Data that complies with MigsBa combined with Sediment", "items": { "$ref": "#/$defs/MigsBaSediment" }, "type": "array" }, "migs_ba_soil_data": { "description": "Data that complies with MigsBa combined with Soil", "items": { "$ref": "#/$defs/MigsBaSoil" }, "type": "array" }, "migs_ba_symbiont_associated_data": { "description": "Data that complies with MigsBa combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsBaSymbiontAssociated" }, "type": "array" }, "migs_ba_wastewater_sludge_data": { "description": "Data that complies with MigsBa combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsBaWastewaterSludge" }, "type": "array" }, "migs_ba_water_data": { "description": "Data that complies with MigsBa combined with Water", "items": { "$ref": "#/$defs/MigsBaWater" }, "type": "array" }, "migs_eu_agriculture_data": { "description": "Data that complies with MigsEu combined with Agriculture", "items": { "$ref": "#/$defs/MigsEuAgriculture" }, "type": "array" }, "migs_eu_air_data": { "description": "Data that complies with MigsEu combined with Air", "items": { "$ref": "#/$defs/MigsEuAir" }, "type": "array" }, "migs_eu_built_environment_data": { "description": "Data that complies with MigsEu combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsEuBuiltEnvironment" }, "type": "array" }, "migs_eu_data": { "description": "Data that complies with checklist MigsEu", "items": { "$ref": "#/$defs/MigsEu" }, "type": "array" }, "migs_eu_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsEu combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsEuFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_eu_food_farm_environment_data": { "description": "Data that complies with MigsEu combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsEuFoodFarmEnvironment" }, "type": "array" }, "migs_eu_food_food_production_facility_data": { "description": "Data that complies with MigsEu combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsEuFoodFoodProductionFacility" }, "type": "array" }, "migs_eu_food_human_foods_data": { "description": "Data that complies with MigsEu combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsEuFoodHumanFoods" }, "type": "array" }, "migs_eu_host_associated_data": { "description": "Data that complies with MigsEu combined with HostAssociated", "items": { "$ref": "#/$defs/MigsEuHostAssociated" }, "type": "array" }, "migs_eu_human_associated_data": { "description": "Data that complies with MigsEu combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsEuHumanAssociated" }, "type": "array" }, "migs_eu_human_gut_data": { "description": "Data that complies with MigsEu combined with HumanGut", "items": { "$ref": "#/$defs/MigsEuHumanGut" }, "type": "array" }, "migs_eu_human_oral_data": { "description": "Data that complies with MigsEu combined with HumanOral", "items": { "$ref": "#/$defs/MigsEuHumanOral" }, "type": "array" }, "migs_eu_human_skin_data": { "description": "Data that complies with MigsEu combined with HumanSkin", "items": { "$ref": "#/$defs/MigsEuHumanSkin" }, "type": "array" }, "migs_eu_human_vaginal_data": { "description": "Data that complies with MigsEu combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsEuHumanVaginal" }, "type": "array" }, "migs_eu_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsEu combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsEuHydrocarbonResourcesCores" }, "type": "array" }, "migs_eu_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsEu combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsEuHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_eu_microbial_mat_biofilm_data": { "description": "Data that complies with MigsEu combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsEuMicrobialMatBiofilm" }, "type": "array" }, "migs_eu_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsEu combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsEuMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_eu_plant_associated_data": { "description": "Data that complies with MigsEu combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsEuPlantAssociated" }, "type": "array" }, "migs_eu_sediment_data": { "description": "Data that complies with MigsEu combined with Sediment", "items": { "$ref": "#/$defs/MigsEuSediment" }, "type": "array" }, "migs_eu_soil_data": { "description": "Data that complies with MigsEu combined with Soil", "items": { "$ref": "#/$defs/MigsEuSoil" }, "type": "array" }, "migs_eu_symbiont_associated_data": { "description": "Data that complies with MigsEu combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsEuSymbiontAssociated" }, "type": "array" }, "migs_eu_wastewater_sludge_data": { "description": "Data that complies with MigsEu combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsEuWastewaterSludge" }, "type": "array" }, "migs_eu_water_data": { "description": "Data that complies with MigsEu combined with Water", "items": { "$ref": "#/$defs/MigsEuWater" }, "type": "array" }, "migs_org_agriculture_data": { "description": "Data that complies with MigsOrg combined with Agriculture", "items": { "$ref": "#/$defs/MigsOrgAgriculture" }, "type": "array" }, "migs_org_air_data": { "description": "Data that complies with MigsOrg combined with Air", "items": { "$ref": "#/$defs/MigsOrgAir" }, "type": "array" }, "migs_org_built_environment_data": { "description": "Data that complies with MigsOrg combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsOrgBuiltEnvironment" }, "type": "array" }, "migs_org_data": { "description": "Data that complies with checklist MigsOrg", "items": { "$ref": "#/$defs/MigsOrg" }, "type": "array" }, "migs_org_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsOrg combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsOrgFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_org_food_farm_environment_data": { "description": "Data that complies with MigsOrg combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsOrgFoodFarmEnvironment" }, "type": "array" }, "migs_org_food_food_production_facility_data": { "description": "Data that complies with MigsOrg combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsOrgFoodFoodProductionFacility" }, "type": "array" }, "migs_org_food_human_foods_data": { "description": "Data that complies with MigsOrg combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsOrgFoodHumanFoods" }, "type": "array" }, "migs_org_host_associated_data": { "description": "Data that complies with MigsOrg combined with HostAssociated", "items": { "$ref": "#/$defs/MigsOrgHostAssociated" }, "type": "array" }, "migs_org_human_associated_data": { "description": "Data that complies with MigsOrg combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsOrgHumanAssociated" }, "type": "array" }, "migs_org_human_gut_data": { "description": "Data that complies with MigsOrg combined with HumanGut", "items": { "$ref": "#/$defs/MigsOrgHumanGut" }, "type": "array" }, "migs_org_human_oral_data": { "description": "Data that complies with MigsOrg combined with HumanOral", "items": { "$ref": "#/$defs/MigsOrgHumanOral" }, "type": "array" }, "migs_org_human_skin_data": { "description": "Data that complies with MigsOrg combined with HumanSkin", "items": { "$ref": "#/$defs/MigsOrgHumanSkin" }, "type": "array" }, "migs_org_human_vaginal_data": { "description": "Data that complies with MigsOrg combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsOrgHumanVaginal" }, "type": "array" }, "migs_org_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsOrg combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsOrgHydrocarbonResourcesCores" }, "type": "array" }, "migs_org_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsOrg combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsOrgHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_org_microbial_mat_biofilm_data": { "description": "Data that complies with MigsOrg combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsOrgMicrobialMatBiofilm" }, "type": "array" }, "migs_org_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsOrg combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsOrgMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_org_plant_associated_data": { "description": "Data that complies with MigsOrg combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsOrgPlantAssociated" }, "type": "array" }, "migs_org_sediment_data": { "description": "Data that complies with MigsOrg combined with Sediment", "items": { "$ref": "#/$defs/MigsOrgSediment" }, "type": "array" }, "migs_org_soil_data": { "description": "Data that complies with MigsOrg combined with Soil", "items": { "$ref": "#/$defs/MigsOrgSoil" }, "type": "array" }, "migs_org_symbiont_associated_data": { "description": "Data that complies with MigsOrg combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsOrgSymbiontAssociated" }, "type": "array" }, "migs_org_wastewater_sludge_data": { "description": "Data that complies with MigsOrg combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsOrgWastewaterSludge" }, "type": "array" }, "migs_org_water_data": { "description": "Data that complies with MigsOrg combined with Water", "items": { "$ref": "#/$defs/MigsOrgWater" }, "type": "array" }, "migs_pl_agriculture_data": { "description": "Data that complies with MigsPl combined with Agriculture", "items": { "$ref": "#/$defs/MigsPlAgriculture" }, "type": "array" }, "migs_pl_air_data": { "description": "Data that complies with MigsPl combined with Air", "items": { "$ref": "#/$defs/MigsPlAir" }, "type": "array" }, "migs_pl_built_environment_data": { "description": "Data that complies with MigsPl combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsPlBuiltEnvironment" }, "type": "array" }, "migs_pl_data": { "description": "Data that complies with checklist MigsPl", "items": { "$ref": "#/$defs/MigsPl" }, "type": "array" }, "migs_pl_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsPl combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsPlFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_pl_food_farm_environment_data": { "description": "Data that complies with MigsPl combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsPlFoodFarmEnvironment" }, "type": "array" }, "migs_pl_food_food_production_facility_data": { "description": "Data that complies with MigsPl combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsPlFoodFoodProductionFacility" }, "type": "array" }, "migs_pl_food_human_foods_data": { "description": "Data that complies with MigsPl combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsPlFoodHumanFoods" }, "type": "array" }, "migs_pl_host_associated_data": { "description": "Data that complies with MigsPl combined with HostAssociated", "items": { "$ref": "#/$defs/MigsPlHostAssociated" }, "type": "array" }, "migs_pl_human_associated_data": { "description": "Data that complies with MigsPl combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsPlHumanAssociated" }, "type": "array" }, "migs_pl_human_gut_data": { "description": "Data that complies with MigsPl combined with HumanGut", "items": { "$ref": "#/$defs/MigsPlHumanGut" }, "type": "array" }, "migs_pl_human_oral_data": { "description": "Data that complies with MigsPl combined with HumanOral", "items": { "$ref": "#/$defs/MigsPlHumanOral" }, "type": "array" }, "migs_pl_human_skin_data": { "description": "Data that complies with MigsPl combined with HumanSkin", "items": { "$ref": "#/$defs/MigsPlHumanSkin" }, "type": "array" }, "migs_pl_human_vaginal_data": { "description": "Data that complies with MigsPl combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsPlHumanVaginal" }, "type": "array" }, "migs_pl_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsPl combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsPlHydrocarbonResourcesCores" }, "type": "array" }, "migs_pl_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsPl combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsPlHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_pl_microbial_mat_biofilm_data": { "description": "Data that complies with MigsPl combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsPlMicrobialMatBiofilm" }, "type": "array" }, "migs_pl_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsPl combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsPlMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_pl_plant_associated_data": { "description": "Data that complies with MigsPl combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsPlPlantAssociated" }, "type": "array" }, "migs_pl_sediment_data": { "description": "Data that complies with MigsPl combined with Sediment", "items": { "$ref": "#/$defs/MigsPlSediment" }, "type": "array" }, "migs_pl_soil_data": { "description": "Data that complies with MigsPl combined with Soil", "items": { "$ref": "#/$defs/MigsPlSoil" }, "type": "array" }, "migs_pl_symbiont_associated_data": { "description": "Data that complies with MigsPl combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsPlSymbiontAssociated" }, "type": "array" }, "migs_pl_wastewater_sludge_data": { "description": "Data that complies with MigsPl combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsPlWastewaterSludge" }, "type": "array" }, "migs_pl_water_data": { "description": "Data that complies with MigsPl combined with Water", "items": { "$ref": "#/$defs/MigsPlWater" }, "type": "array" }, "migs_vi_agriculture_data": { "description": "Data that complies with MigsVi combined with Agriculture", "items": { "$ref": "#/$defs/MigsViAgriculture" }, "type": "array" }, "migs_vi_air_data": { "description": "Data that complies with MigsVi combined with Air", "items": { "$ref": "#/$defs/MigsViAir" }, "type": "array" }, "migs_vi_built_environment_data": { "description": "Data that complies with MigsVi combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MigsViBuiltEnvironment" }, "type": "array" }, "migs_vi_data": { "description": "Data that complies with checklist MigsVi", "items": { "$ref": "#/$defs/MigsVi" }, "type": "array" }, "migs_vi_food_animal_and_animal_feed_data": { "description": "Data that complies with MigsVi combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MigsViFoodAnimalAndAnimalFeed" }, "type": "array" }, "migs_vi_food_farm_environment_data": { "description": "Data that complies with MigsVi combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MigsViFoodFarmEnvironment" }, "type": "array" }, "migs_vi_food_food_production_facility_data": { "description": "Data that complies with MigsVi combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MigsViFoodFoodProductionFacility" }, "type": "array" }, "migs_vi_food_human_foods_data": { "description": "Data that complies with MigsVi combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MigsViFoodHumanFoods" }, "type": "array" }, "migs_vi_host_associated_data": { "description": "Data that complies with MigsVi combined with HostAssociated", "items": { "$ref": "#/$defs/MigsViHostAssociated" }, "type": "array" }, "migs_vi_human_associated_data": { "description": "Data that complies with MigsVi combined with HumanAssociated", "items": { "$ref": "#/$defs/MigsViHumanAssociated" }, "type": "array" }, "migs_vi_human_gut_data": { "description": "Data that complies with MigsVi combined with HumanGut", "items": { "$ref": "#/$defs/MigsViHumanGut" }, "type": "array" }, "migs_vi_human_oral_data": { "description": "Data that complies with MigsVi combined with HumanOral", "items": { "$ref": "#/$defs/MigsViHumanOral" }, "type": "array" }, "migs_vi_human_skin_data": { "description": "Data that complies with MigsVi combined with HumanSkin", "items": { "$ref": "#/$defs/MigsViHumanSkin" }, "type": "array" }, "migs_vi_human_vaginal_data": { "description": "Data that complies with MigsVi combined with HumanVaginal", "items": { "$ref": "#/$defs/MigsViHumanVaginal" }, "type": "array" }, "migs_vi_hydrocarbon_resources_cores_data": { "description": "Data that complies with MigsVi combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MigsViHydrocarbonResourcesCores" }, "type": "array" }, "migs_vi_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MigsVi combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MigsViHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "migs_vi_microbial_mat_biofilm_data": { "description": "Data that complies with MigsVi combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MigsViMicrobialMatBiofilm" }, "type": "array" }, "migs_vi_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MigsVi combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MigsViMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "migs_vi_plant_associated_data": { "description": "Data that complies with MigsVi combined with PlantAssociated", "items": { "$ref": "#/$defs/MigsViPlantAssociated" }, "type": "array" }, "migs_vi_sediment_data": { "description": "Data that complies with MigsVi combined with Sediment", "items": { "$ref": "#/$defs/MigsViSediment" }, "type": "array" }, "migs_vi_soil_data": { "description": "Data that complies with MigsVi combined with Soil", "items": { "$ref": "#/$defs/MigsViSoil" }, "type": "array" }, "migs_vi_symbiont_associated_data": { "description": "Data that complies with MigsVi combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MigsViSymbiontAssociated" }, "type": "array" }, "migs_vi_wastewater_sludge_data": { "description": "Data that complies with MigsVi combined with WastewaterSludge", "items": { "$ref": "#/$defs/MigsViWastewaterSludge" }, "type": "array" }, "migs_vi_water_data": { "description": "Data that complies with MigsVi combined with Water", "items": { "$ref": "#/$defs/MigsViWater" }, "type": "array" }, "mimag_agriculture_data": { "description": "Data that complies with Mimag combined with Agriculture", "items": { "$ref": "#/$defs/MimagAgriculture" }, "type": "array" }, "mimag_air_data": { "description": "Data that complies with Mimag combined with Air", "items": { "$ref": "#/$defs/MimagAir" }, "type": "array" }, "mimag_built_environment_data": { "description": "Data that complies with Mimag combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimagBuiltEnvironment" }, "type": "array" }, "mimag_data": { "description": "Data that complies with checklist Mimag", "items": { "$ref": "#/$defs/Mimag" }, "type": "array" }, "mimag_food_animal_and_animal_feed_data": { "description": "Data that complies with Mimag combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimagFoodAnimalAndAnimalFeed" }, "type": "array" }, "mimag_food_farm_environment_data": { "description": "Data that complies with Mimag combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimagFoodFarmEnvironment" }, "type": "array" }, "mimag_food_food_production_facility_data": { "description": "Data that complies with Mimag combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimagFoodFoodProductionFacility" }, "type": "array" }, "mimag_food_human_foods_data": { "description": "Data that complies with Mimag combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimagFoodHumanFoods" }, "type": "array" }, "mimag_host_associated_data": { "description": "Data that complies with Mimag combined with HostAssociated", "items": { "$ref": "#/$defs/MimagHostAssociated" }, "type": "array" }, "mimag_human_associated_data": { "description": "Data that complies with Mimag combined with HumanAssociated", "items": { "$ref": "#/$defs/MimagHumanAssociated" }, "type": "array" }, "mimag_human_gut_data": { "description": "Data that complies with Mimag combined with HumanGut", "items": { "$ref": "#/$defs/MimagHumanGut" }, "type": "array" }, "mimag_human_oral_data": { "description": "Data that complies with Mimag combined with HumanOral", "items": { "$ref": "#/$defs/MimagHumanOral" }, "type": "array" }, "mimag_human_skin_data": { "description": "Data that complies with Mimag combined with HumanSkin", "items": { "$ref": "#/$defs/MimagHumanSkin" }, "type": "array" }, "mimag_human_vaginal_data": { "description": "Data that complies with Mimag combined with HumanVaginal", "items": { "$ref": "#/$defs/MimagHumanVaginal" }, "type": "array" }, "mimag_hydrocarbon_resources_cores_data": { "description": "Data that complies with Mimag combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimagHydrocarbonResourcesCores" }, "type": "array" }, "mimag_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Mimag combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimagHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mimag_microbial_mat_biofilm_data": { "description": "Data that complies with Mimag combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimagMicrobialMatBiofilm" }, "type": "array" }, "mimag_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Mimag combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimagMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mimag_plant_associated_data": { "description": "Data that complies with Mimag combined with PlantAssociated", "items": { "$ref": "#/$defs/MimagPlantAssociated" }, "type": "array" }, "mimag_sediment_data": { "description": "Data that complies with Mimag combined with Sediment", "items": { "$ref": "#/$defs/MimagSediment" }, "type": "array" }, "mimag_soil_data": { "description": "Data that complies with Mimag combined with Soil", "items": { "$ref": "#/$defs/MimagSoil" }, "type": "array" }, "mimag_symbiont_associated_data": { "description": "Data that complies with Mimag combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimagSymbiontAssociated" }, "type": "array" }, "mimag_wastewater_sludge_data": { "description": "Data that complies with Mimag combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimagWastewaterSludge" }, "type": "array" }, "mimag_water_data": { "description": "Data that complies with Mimag combined with Water", "items": { "$ref": "#/$defs/MimagWater" }, "type": "array" }, "mimarks_c_agriculture_data": { "description": "Data that complies with MimarksC combined with Agriculture", "items": { "$ref": "#/$defs/MimarksCAgriculture" }, "type": "array" }, "mimarks_c_air_data": { "description": "Data that complies with MimarksC combined with Air", "items": { "$ref": "#/$defs/MimarksCAir" }, "type": "array" }, "mimarks_c_built_environment_data": { "description": "Data that complies with MimarksC combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimarksCBuiltEnvironment" }, "type": "array" }, "mimarks_c_data": { "description": "Data that complies with checklist MimarksC", "items": { "$ref": "#/$defs/MimarksC" }, "type": "array" }, "mimarks_c_food_animal_and_animal_feed_data": { "description": "Data that complies with MimarksC combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimarksCFoodAnimalAndAnimalFeed" }, "type": "array" }, "mimarks_c_food_farm_environment_data": { "description": "Data that complies with MimarksC combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimarksCFoodFarmEnvironment" }, "type": "array" }, "mimarks_c_food_food_production_facility_data": { "description": "Data that complies with MimarksC combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimarksCFoodFoodProductionFacility" }, "type": "array" }, "mimarks_c_food_human_foods_data": { "description": "Data that complies with MimarksC combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimarksCFoodHumanFoods" }, "type": "array" }, "mimarks_c_host_associated_data": { "description": "Data that complies with MimarksC combined with HostAssociated", "items": { "$ref": "#/$defs/MimarksCHostAssociated" }, "type": "array" }, "mimarks_c_human_associated_data": { "description": "Data that complies with MimarksC combined with HumanAssociated", "items": { "$ref": "#/$defs/MimarksCHumanAssociated" }, "type": "array" }, "mimarks_c_human_gut_data": { "description": "Data that complies with MimarksC combined with HumanGut", "items": { "$ref": "#/$defs/MimarksCHumanGut" }, "type": "array" }, "mimarks_c_human_oral_data": { "description": "Data that complies with MimarksC combined with HumanOral", "items": { "$ref": "#/$defs/MimarksCHumanOral" }, "type": "array" }, "mimarks_c_human_skin_data": { "description": "Data that complies with MimarksC combined with HumanSkin", "items": { "$ref": "#/$defs/MimarksCHumanSkin" }, "type": "array" }, "mimarks_c_human_vaginal_data": { "description": "Data that complies with MimarksC combined with HumanVaginal", "items": { "$ref": "#/$defs/MimarksCHumanVaginal" }, "type": "array" }, "mimarks_c_hydrocarbon_resources_cores_data": { "description": "Data that complies with MimarksC combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimarksCHydrocarbonResourcesCores" }, "type": "array" }, "mimarks_c_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MimarksC combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimarksCHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mimarks_c_microbial_mat_biofilm_data": { "description": "Data that complies with MimarksC combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimarksCMicrobialMatBiofilm" }, "type": "array" }, "mimarks_c_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MimarksC combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimarksCMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mimarks_c_plant_associated_data": { "description": "Data that complies with MimarksC combined with PlantAssociated", "items": { "$ref": "#/$defs/MimarksCPlantAssociated" }, "type": "array" }, "mimarks_c_sediment_data": { "description": "Data that complies with MimarksC combined with Sediment", "items": { "$ref": "#/$defs/MimarksCSediment" }, "type": "array" }, "mimarks_c_soil_data": { "description": "Data that complies with MimarksC combined with Soil", "items": { "$ref": "#/$defs/MimarksCSoil" }, "type": "array" }, "mimarks_c_symbiont_associated_data": { "description": "Data that complies with MimarksC combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimarksCSymbiontAssociated" }, "type": "array" }, "mimarks_c_wastewater_sludge_data": { "description": "Data that complies with MimarksC combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimarksCWastewaterSludge" }, "type": "array" }, "mimarks_c_water_data": { "description": "Data that complies with MimarksC combined with Water", "items": { "$ref": "#/$defs/MimarksCWater" }, "type": "array" }, "mimarks_s_agriculture_data": { "description": "Data that complies with MimarksS combined with Agriculture", "items": { "$ref": "#/$defs/MimarksSAgriculture" }, "type": "array" }, "mimarks_s_air_data": { "description": "Data that complies with MimarksS combined with Air", "items": { "$ref": "#/$defs/MimarksSAir" }, "type": "array" }, "mimarks_s_built_environment_data": { "description": "Data that complies with MimarksS combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimarksSBuiltEnvironment" }, "type": "array" }, "mimarks_s_data": { "description": "Data that complies with checklist MimarksS", "items": { "$ref": "#/$defs/MimarksS" }, "type": "array" }, "mimarks_s_food_animal_and_animal_feed_data": { "description": "Data that complies with MimarksS combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimarksSFoodAnimalAndAnimalFeed" }, "type": "array" }, "mimarks_s_food_farm_environment_data": { "description": "Data that complies with MimarksS combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimarksSFoodFarmEnvironment" }, "type": "array" }, "mimarks_s_food_food_production_facility_data": { "description": "Data that complies with MimarksS combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimarksSFoodFoodProductionFacility" }, "type": "array" }, "mimarks_s_food_human_foods_data": { "description": "Data that complies with MimarksS combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimarksSFoodHumanFoods" }, "type": "array" }, "mimarks_s_host_associated_data": { "description": "Data that complies with MimarksS combined with HostAssociated", "items": { "$ref": "#/$defs/MimarksSHostAssociated" }, "type": "array" }, "mimarks_s_human_associated_data": { "description": "Data that complies with MimarksS combined with HumanAssociated", "items": { "$ref": "#/$defs/MimarksSHumanAssociated" }, "type": "array" }, "mimarks_s_human_gut_data": { "description": "Data that complies with MimarksS combined with HumanGut", "items": { "$ref": "#/$defs/MimarksSHumanGut" }, "type": "array" }, "mimarks_s_human_oral_data": { "description": "Data that complies with MimarksS combined with HumanOral", "items": { "$ref": "#/$defs/MimarksSHumanOral" }, "type": "array" }, "mimarks_s_human_skin_data": { "description": "Data that complies with MimarksS combined with HumanSkin", "items": { "$ref": "#/$defs/MimarksSHumanSkin" }, "type": "array" }, "mimarks_s_human_vaginal_data": { "description": "Data that complies with MimarksS combined with HumanVaginal", "items": { "$ref": "#/$defs/MimarksSHumanVaginal" }, "type": "array" }, "mimarks_s_hydrocarbon_resources_cores_data": { "description": "Data that complies with MimarksS combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimarksSHydrocarbonResourcesCores" }, "type": "array" }, "mimarks_s_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with MimarksS combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimarksSHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mimarks_s_microbial_mat_biofilm_data": { "description": "Data that complies with MimarksS combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimarksSMicrobialMatBiofilm" }, "type": "array" }, "mimarks_s_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with MimarksS combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimarksSMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mimarks_s_plant_associated_data": { "description": "Data that complies with MimarksS combined with PlantAssociated", "items": { "$ref": "#/$defs/MimarksSPlantAssociated" }, "type": "array" }, "mimarks_s_sediment_data": { "description": "Data that complies with MimarksS combined with Sediment", "items": { "$ref": "#/$defs/MimarksSSediment" }, "type": "array" }, "mimarks_s_soil_data": { "description": "Data that complies with MimarksS combined with Soil", "items": { "$ref": "#/$defs/MimarksSSoil" }, "type": "array" }, "mimarks_s_symbiont_associated_data": { "description": "Data that complies with MimarksS combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimarksSSymbiontAssociated" }, "type": "array" }, "mimarks_s_wastewater_sludge_data": { "description": "Data that complies with MimarksS combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimarksSWastewaterSludge" }, "type": "array" }, "mimarks_s_water_data": { "description": "Data that complies with MimarksS combined with Water", "items": { "$ref": "#/$defs/MimarksSWater" }, "type": "array" }, "mims_agriculture_data": { "description": "Data that complies with Mims combined with Agriculture", "items": { "$ref": "#/$defs/MimsAgriculture" }, "type": "array" }, "mims_air_data": { "description": "Data that complies with Mims combined with Air", "items": { "$ref": "#/$defs/MimsAir" }, "type": "array" }, "mims_built_environment_data": { "description": "Data that complies with Mims combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MimsBuiltEnvironment" }, "type": "array" }, "mims_data": { "description": "Data that complies with checklist Mims", "items": { "$ref": "#/$defs/Mims" }, "type": "array" }, "mims_food_animal_and_animal_feed_data": { "description": "Data that complies with Mims combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MimsFoodAnimalAndAnimalFeed" }, "type": "array" }, "mims_food_farm_environment_data": { "description": "Data that complies with Mims combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MimsFoodFarmEnvironment" }, "type": "array" }, "mims_food_food_production_facility_data": { "description": "Data that complies with Mims combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MimsFoodFoodProductionFacility" }, "type": "array" }, "mims_food_human_foods_data": { "description": "Data that complies with Mims combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MimsFoodHumanFoods" }, "type": "array" }, "mims_host_associated_data": { "description": "Data that complies with Mims combined with HostAssociated", "items": { "$ref": "#/$defs/MimsHostAssociated" }, "type": "array" }, "mims_human_associated_data": { "description": "Data that complies with Mims combined with HumanAssociated", "items": { "$ref": "#/$defs/MimsHumanAssociated" }, "type": "array" }, "mims_human_gut_data": { "description": "Data that complies with Mims combined with HumanGut", "items": { "$ref": "#/$defs/MimsHumanGut" }, "type": "array" }, "mims_human_oral_data": { "description": "Data that complies with Mims combined with HumanOral", "items": { "$ref": "#/$defs/MimsHumanOral" }, "type": "array" }, "mims_human_skin_data": { "description": "Data that complies with Mims combined with HumanSkin", "items": { "$ref": "#/$defs/MimsHumanSkin" }, "type": "array" }, "mims_human_vaginal_data": { "description": "Data that complies with Mims combined with HumanVaginal", "items": { "$ref": "#/$defs/MimsHumanVaginal" }, "type": "array" }, "mims_hydrocarbon_resources_cores_data": { "description": "Data that complies with Mims combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MimsHydrocarbonResourcesCores" }, "type": "array" }, "mims_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Mims combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MimsHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "mims_microbial_mat_biofilm_data": { "description": "Data that complies with Mims combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MimsMicrobialMatBiofilm" }, "type": "array" }, "mims_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Mims combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MimsMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "mims_plant_associated_data": { "description": "Data that complies with Mims combined with PlantAssociated", "items": { "$ref": "#/$defs/MimsPlantAssociated" }, "type": "array" }, "mims_sediment_data": { "description": "Data that complies with Mims combined with Sediment", "items": { "$ref": "#/$defs/MimsSediment" }, "type": "array" }, "mims_soil_data": { "description": "Data that complies with Mims combined with Soil", "items": { "$ref": "#/$defs/MimsSoil" }, "type": "array" }, "mims_symbiont_associated_data": { "description": "Data that complies with Mims combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MimsSymbiontAssociated" }, "type": "array" }, "mims_wastewater_sludge_data": { "description": "Data that complies with Mims combined with WastewaterSludge", "items": { "$ref": "#/$defs/MimsWastewaterSludge" }, "type": "array" }, "mims_water_data": { "description": "Data that complies with Mims combined with Water", "items": { "$ref": "#/$defs/MimsWater" }, "type": "array" }, "misag_agriculture_data": { "description": "Data that complies with Misag combined with Agriculture", "items": { "$ref": "#/$defs/MisagAgriculture" }, "type": "array" }, "misag_air_data": { "description": "Data that complies with Misag combined with Air", "items": { "$ref": "#/$defs/MisagAir" }, "type": "array" }, "misag_built_environment_data": { "description": "Data that complies with Misag combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MisagBuiltEnvironment" }, "type": "array" }, "misag_data": { "description": "Data that complies with checklist Misag", "items": { "$ref": "#/$defs/Misag" }, "type": "array" }, "misag_food_animal_and_animal_feed_data": { "description": "Data that complies with Misag combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MisagFoodAnimalAndAnimalFeed" }, "type": "array" }, "misag_food_farm_environment_data": { "description": "Data that complies with Misag combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MisagFoodFarmEnvironment" }, "type": "array" }, "misag_food_food_production_facility_data": { "description": "Data that complies with Misag combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MisagFoodFoodProductionFacility" }, "type": "array" }, "misag_food_human_foods_data": { "description": "Data that complies with Misag combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MisagFoodHumanFoods" }, "type": "array" }, "misag_host_associated_data": { "description": "Data that complies with Misag combined with HostAssociated", "items": { "$ref": "#/$defs/MisagHostAssociated" }, "type": "array" }, "misag_human_associated_data": { "description": "Data that complies with Misag combined with HumanAssociated", "items": { "$ref": "#/$defs/MisagHumanAssociated" }, "type": "array" }, "misag_human_gut_data": { "description": "Data that complies with Misag combined with HumanGut", "items": { "$ref": "#/$defs/MisagHumanGut" }, "type": "array" }, "misag_human_oral_data": { "description": "Data that complies with Misag combined with HumanOral", "items": { "$ref": "#/$defs/MisagHumanOral" }, "type": "array" }, "misag_human_skin_data": { "description": "Data that complies with Misag combined with HumanSkin", "items": { "$ref": "#/$defs/MisagHumanSkin" }, "type": "array" }, "misag_human_vaginal_data": { "description": "Data that complies with Misag combined with HumanVaginal", "items": { "$ref": "#/$defs/MisagHumanVaginal" }, "type": "array" }, "misag_hydrocarbon_resources_cores_data": { "description": "Data that complies with Misag combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MisagHydrocarbonResourcesCores" }, "type": "array" }, "misag_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Misag combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MisagHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "misag_microbial_mat_biofilm_data": { "description": "Data that complies with Misag combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MisagMicrobialMatBiofilm" }, "type": "array" }, "misag_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Misag combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MisagMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "misag_plant_associated_data": { "description": "Data that complies with Misag combined with PlantAssociated", "items": { "$ref": "#/$defs/MisagPlantAssociated" }, "type": "array" }, "misag_sediment_data": { "description": "Data that complies with Misag combined with Sediment", "items": { "$ref": "#/$defs/MisagSediment" }, "type": "array" }, "misag_soil_data": { "description": "Data that complies with Misag combined with Soil", "items": { "$ref": "#/$defs/MisagSoil" }, "type": "array" }, "misag_symbiont_associated_data": { "description": "Data that complies with Misag combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MisagSymbiontAssociated" }, "type": "array" }, "misag_wastewater_sludge_data": { "description": "Data that complies with Misag combined with WastewaterSludge", "items": { "$ref": "#/$defs/MisagWastewaterSludge" }, "type": "array" }, "misag_water_data": { "description": "Data that complies with Misag combined with Water", "items": { "$ref": "#/$defs/MisagWater" }, "type": "array" }, "miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Extension MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "miuvig_agriculture_data": { "description": "Data that complies with Miuvig combined with Agriculture", "items": { "$ref": "#/$defs/MiuvigAgriculture" }, "type": "array" }, "miuvig_air_data": { "description": "Data that complies with Miuvig combined with Air", "items": { "$ref": "#/$defs/MiuvigAir" }, "type": "array" }, "miuvig_built_environment_data": { "description": "Data that complies with Miuvig combined with BuiltEnvironment", "items": { "$ref": "#/$defs/MiuvigBuiltEnvironment" }, "type": "array" }, "miuvig_data": { "description": "Data that complies with checklist Miuvig", "items": { "$ref": "#/$defs/Miuvig" }, "type": "array" }, "miuvig_food_animal_and_animal_feed_data": { "description": "Data that complies with Miuvig combined with FoodAnimalAndAnimalFeed", "items": { "$ref": "#/$defs/MiuvigFoodAnimalAndAnimalFeed" }, "type": "array" }, "miuvig_food_farm_environment_data": { "description": "Data that complies with Miuvig combined with FoodFarmEnvironment", "items": { "$ref": "#/$defs/MiuvigFoodFarmEnvironment" }, "type": "array" }, "miuvig_food_food_production_facility_data": { "description": "Data that complies with Miuvig combined with FoodFoodProductionFacility", "items": { "$ref": "#/$defs/MiuvigFoodFoodProductionFacility" }, "type": "array" }, "miuvig_food_human_foods_data": { "description": "Data that complies with Miuvig combined with FoodHumanFoods", "items": { "$ref": "#/$defs/MiuvigFoodHumanFoods" }, "type": "array" }, "miuvig_host_associated_data": { "description": "Data that complies with Miuvig combined with HostAssociated", "items": { "$ref": "#/$defs/MiuvigHostAssociated" }, "type": "array" }, "miuvig_human_associated_data": { "description": "Data that complies with Miuvig combined with HumanAssociated", "items": { "$ref": "#/$defs/MiuvigHumanAssociated" }, "type": "array" }, "miuvig_human_gut_data": { "description": "Data that complies with Miuvig combined with HumanGut", "items": { "$ref": "#/$defs/MiuvigHumanGut" }, "type": "array" }, "miuvig_human_oral_data": { "description": "Data that complies with Miuvig combined with HumanOral", "items": { "$ref": "#/$defs/MiuvigHumanOral" }, "type": "array" }, "miuvig_human_skin_data": { "description": "Data that complies with Miuvig combined with HumanSkin", "items": { "$ref": "#/$defs/MiuvigHumanSkin" }, "type": "array" }, "miuvig_human_vaginal_data": { "description": "Data that complies with Miuvig combined with HumanVaginal", "items": { "$ref": "#/$defs/MiuvigHumanVaginal" }, "type": "array" }, "miuvig_hydrocarbon_resources_cores_data": { "description": "Data that complies with Miuvig combined with HydrocarbonResourcesCores", "items": { "$ref": "#/$defs/MiuvigHydrocarbonResourcesCores" }, "type": "array" }, "miuvig_hydrocarbon_resources_fluids_swabs_data": { "description": "Data that complies with Miuvig combined with HydrocarbonResourcesFluidsSwabs", "items": { "$ref": "#/$defs/MiuvigHydrocarbonResourcesFluidsSwabs" }, "type": "array" }, "miuvig_microbial_mat_biofilm_data": { "description": "Data that complies with Miuvig combined with MicrobialMatBiofilm", "items": { "$ref": "#/$defs/MiuvigMicrobialMatBiofilm" }, "type": "array" }, "miuvig_miscellaneous_natural_or_artificial_environment_data": { "description": "Data that complies with Miuvig combined with MiscellaneousNaturalOrArtificialEnvironment", "items": { "$ref": "#/$defs/MiuvigMiscellaneousNaturalOrArtificialEnvironment" }, "type": "array" }, "miuvig_plant_associated_data": { "description": "Data that complies with Miuvig combined with PlantAssociated", "items": { "$ref": "#/$defs/MiuvigPlantAssociated" }, "type": "array" }, "miuvig_sediment_data": { "description": "Data that complies with Miuvig combined with Sediment", "items": { "$ref": "#/$defs/MiuvigSediment" }, "type": "array" }, "miuvig_soil_data": { "description": "Data that complies with Miuvig combined with Soil", "items": { "$ref": "#/$defs/MiuvigSoil" }, "type": "array" }, "miuvig_symbiont_associated_data": { "description": "Data that complies with Miuvig combined with SymbiontAssociated", "items": { "$ref": "#/$defs/MiuvigSymbiontAssociated" }, "type": "array" }, "miuvig_wastewater_sludge_data": { "description": "Data that complies with Miuvig combined with WastewaterSludge", "items": { "$ref": "#/$defs/MiuvigWastewaterSludge" }, "type": "array" }, "miuvig_water_data": { "description": "Data that complies with Miuvig combined with Water", "items": { "$ref": "#/$defs/MiuvigWater" }, "type": "array" }, "plant_associated_data": { "description": "Data that complies with Extension PlantAssociated", "items": { "$ref": "#/$defs/PlantAssociated" }, "type": "array" }, "sediment_data": { "description": "Data that complies with Extension Sediment", "items": { "$ref": "#/$defs/Sediment" }, "type": "array" }, "soil_data": { "description": "Data that complies with Extension Soil", "items": { "$ref": "#/$defs/Soil" }, "type": "array" }, "symbiont_associated_data": { "description": "Data that complies with Extension SymbiontAssociated", "items": { "$ref": "#/$defs/SymbiontAssociated" }, "type": "array" }, "wastewater_sludge_data": { "description": "Data that complies with Extension WastewaterSludge", "items": { "$ref": "#/$defs/WastewaterSludge" }, "type": "array" }, "water_data": { "description": "Data that complies with Extension Water", "items": { "$ref": "#/$defs/Water" }, "type": "array" } }, "title": "mixs", "type": "object", "version": "v6.2.0" }